Skip to main content

lintel_check/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog::{self, CompiledCatalog};
11use crate::config;
12use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
13use crate::discover;
14use crate::parsers::{self, FileFormat, JsoncParser, Parser};
15use crate::registry;
16use crate::retriever::{CacheStatus, SchemaCache};
17use crate::validation_cache::{self, ValidationCacheStatus, ValidationError};
18
19/// Conservative limit for concurrent file reads to avoid exhausting file
20/// descriptors. 128 is well below the default soft limit on macOS (256) and
21/// Linux (1024) while still providing good throughput.
22const FD_CONCURRENCY_LIMIT: usize = 128;
23
24pub struct ValidateArgs {
25    /// Glob patterns to find files (empty = auto-discover)
26    pub globs: Vec<String>,
27
28    /// Exclude files matching these globs (repeatable)
29    pub exclude: Vec<String>,
30
31    /// Cache directory for remote schemas
32    pub cache_dir: Option<String>,
33
34    /// Bypass schema cache reads (still writes fetched schemas to cache)
35    pub force_schema_fetch: bool,
36
37    /// Bypass validation cache reads (still writes results to cache)
38    pub force_validation: bool,
39
40    /// Disable `SchemaStore` catalog matching
41    pub no_catalog: bool,
42
43    /// Directory to search for `lintel.toml` (defaults to cwd)
44    pub config_dir: Option<PathBuf>,
45
46    /// TTL for cached schemas. `None` means no expiry.
47    pub schema_cache_ttl: Option<core::time::Duration>,
48}
49
50/// Re-exported from [`crate::diagnostics::LintError`] for backwards
51/// compatibility with existing `use lintel_check::validate::LintError` paths.
52pub use crate::diagnostics::LintError;
53
54/// A file that was checked and the schema it resolved to.
55pub struct CheckedFile {
56    pub path: String,
57    pub schema: String,
58    /// `None` for local schemas and builtins; `Some` for remote schemas.
59    pub cache_status: Option<CacheStatus>,
60    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
61    pub validation_cache_status: Option<ValidationCacheStatus>,
62}
63
64/// Result of a validation run.
65pub struct ValidateResult {
66    pub errors: Vec<LintError>,
67    pub checked: Vec<CheckedFile>,
68}
69
70impl ValidateResult {
71    pub fn has_errors(&self) -> bool {
72        !self.errors.is_empty()
73    }
74
75    pub fn files_checked(&self) -> usize {
76        self.checked.len()
77    }
78}
79
80// ---------------------------------------------------------------------------
81// Internal types
82// ---------------------------------------------------------------------------
83
84/// A file that has been parsed and matched to a schema URI.
85struct ParsedFile {
86    path: String,
87    content: String,
88    instance: Value,
89    /// Original schema URI before rewrites (for override matching).
90    original_schema_uri: String,
91}
92
93// ---------------------------------------------------------------------------
94// Config loading
95// ---------------------------------------------------------------------------
96
97/// Locate `lintel.toml`, load the full config, and return the config directory.
98/// Returns `(config, config_dir, config_path)`.  When no config is found or
99/// cwd is unavailable the config is default and `config_path` is `None`.
100#[tracing::instrument(skip_all)]
101pub fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
102    let start_dir = match search_dir {
103        Some(d) => d.to_path_buf(),
104        None => match std::env::current_dir() {
105            Ok(d) => d,
106            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
107        },
108    };
109
110    let Some(config_path) = config::find_config_path(&start_dir) else {
111        return (config::Config::default(), start_dir, None);
112    };
113
114    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
115    let cfg = config::find_and_load(&start_dir)
116        .ok()
117        .flatten()
118        .unwrap_or_default();
119    (cfg, dir, Some(config_path))
120}
121
122// ---------------------------------------------------------------------------
123// File collection
124// ---------------------------------------------------------------------------
125
126/// Collect input files from globs/directories, applying exclude filters.
127#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
128fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
129    if globs.is_empty() {
130        return discover::discover_files(".", exclude);
131    }
132
133    let mut result = Vec::new();
134    for pattern in globs {
135        let path = Path::new(pattern);
136        if path.is_dir() {
137            result.extend(discover::discover_files(pattern, exclude)?);
138        } else {
139            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
140                let path = entry?;
141                if path.is_file() && !is_excluded(&path, exclude) {
142                    result.push(path);
143                }
144            }
145        }
146    }
147    Ok(result)
148}
149
150fn is_excluded(path: &Path, excludes: &[String]) -> bool {
151    let path_str = match path.to_str() {
152        Some(s) => s.strip_prefix("./").unwrap_or(s),
153        None => return false,
154    };
155    excludes
156        .iter()
157        .any(|pattern| glob_match::glob_match(pattern, path_str))
158}
159
160// ---------------------------------------------------------------------------
161// lintel.toml self-validation
162// ---------------------------------------------------------------------------
163
164/// Validate `lintel.toml` against its built-in schema.
165async fn validate_config(
166    config_path: &Path,
167    errors: &mut Vec<LintError>,
168    checked: &mut Vec<CheckedFile>,
169    on_check: &mut impl FnMut(&CheckedFile),
170) -> Result<()> {
171    let content = tokio::fs::read_to_string(config_path).await?;
172    let config_value: Value = toml::from_str(&content)
173        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
174    let schema_value: Value = serde_json::from_str(include_str!(concat!(
175        env!("OUT_DIR"),
176        "/lintel-config.schema.json"
177    )))
178    .context("failed to parse embedded lintel config schema")?;
179    if let Ok(validator) = jsonschema::options().build(&schema_value) {
180        let path_str = config_path.display().to_string();
181        for error in validator.iter_errors(&config_value) {
182            let ip = error.instance_path().to_string();
183            let span = find_instance_path_span(&content, &ip);
184            errors.push(LintError::Config {
185                src: miette::NamedSource::new(&path_str, content.clone()),
186                span: span.into(),
187                path: path_str.clone(),
188                instance_path: if ip.is_empty() {
189                    DEFAULT_LABEL.to_string()
190                } else {
191                    ip
192                },
193                message: clean_error_message(error.to_string()),
194            });
195        }
196        let cf = CheckedFile {
197            path: path_str,
198            schema: "(builtin)".to_string(),
199            cache_status: None,
200            validation_cache_status: None,
201        };
202        on_check(&cf);
203        checked.push(cf);
204    }
205    Ok(())
206}
207
208// ---------------------------------------------------------------------------
209// Phase 1: Parse files and resolve schema URIs
210// ---------------------------------------------------------------------------
211
212/// Try parsing content with each known format, returning the first success.
213///
214/// JSONC is tried first (superset of JSON, handles comments), then YAML and
215/// TOML which cover the most common config formats, followed by the rest.
216pub fn try_parse_all(content: &str, file_name: &str) -> Option<(parsers::FileFormat, Value)> {
217    use parsers::FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
218    const FORMATS: [parsers::FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
219
220    for fmt in FORMATS {
221        let parser = parsers::parser_for(fmt);
222        if let Ok(val) = parser.parse(content, file_name) {
223            return Some((fmt, val));
224        }
225    }
226    None
227}
228
229/// Result of processing a single file: either a parsed file with its schema URI,
230/// a lint error, or nothing (file was skipped).
231enum FileResult {
232    Parsed {
233        schema_uri: String,
234        parsed: ParsedFile,
235    },
236    Error(LintError),
237    Skip,
238}
239
240/// Process a single file's already-read content: parse and resolve schema URI.
241fn process_one_file(
242    path: &Path,
243    content: String,
244    config: &config::Config,
245    config_dir: &Path,
246    compiled_catalogs: &[CompiledCatalog],
247) -> FileResult {
248    let path_str = path.display().to_string();
249    let file_name = path
250        .file_name()
251        .and_then(|n| n.to_str())
252        .unwrap_or(&path_str);
253
254    let detected_format = parsers::detect_format(path);
255
256    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
257    if detected_format.is_none() {
258        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
259            || compiled_catalogs
260                .iter()
261                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
262        if !has_match {
263            return FileResult::Skip;
264        }
265    }
266
267    // Parse the file content.
268    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
269        let parser = parsers::parser_for(fmt);
270        match parser.parse(&content, &path_str) {
271            Ok(val) => (parser, val),
272            Err(parse_err) => {
273                // JSONC fallback for .json files that match a catalog entry.
274                if fmt == FileFormat::Json
275                    && compiled_catalogs
276                        .iter()
277                        .any(|cat| cat.find_schema(&path_str, file_name).is_some())
278                {
279                    match JsoncParser.parse(&content, &path_str) {
280                        Ok(val) => (parsers::parser_for(FileFormat::Jsonc), val),
281                        Err(jsonc_err) => return FileResult::Error(jsonc_err.into()),
282                    }
283                } else {
284                    return FileResult::Error(parse_err.into());
285                }
286            }
287        }
288    } else {
289        match try_parse_all(&content, &path_str) {
290            Some((fmt, val)) => (parsers::parser_for(fmt), val),
291            None => return FileResult::Skip,
292        }
293    };
294
295    // Skip markdown files with no frontmatter
296    if instance.is_null() {
297        return FileResult::Skip;
298    }
299
300    // Schema resolution priority:
301    // 1. Inline $schema / YAML modeline (always wins)
302    // 2. Custom schema mappings from lintel.toml [schemas]
303    // 3. Catalog matching (SchemaStore + additional registries)
304    let schema_uri = parser
305        .extract_schema_uri(&content, &instance)
306        .or_else(|| {
307            config
308                .find_schema_mapping(&path_str, file_name)
309                .map(str::to_string)
310        })
311        .or_else(|| {
312            compiled_catalogs
313                .iter()
314                .find_map(|cat| cat.find_schema(&path_str, file_name))
315                .map(str::to_string)
316        });
317
318    let Some(schema_uri) = schema_uri else {
319        return FileResult::Skip;
320    };
321
322    // Keep original URI for override matching (before rewrites)
323    let original_schema_uri = schema_uri.clone();
324
325    // Apply rewrite rules, then resolve // paths relative to lintel.toml
326    let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
327    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
328
329    // Resolve relative local paths against the file's parent directory.
330    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
331    let schema_uri = if is_remote {
332        schema_uri
333    } else {
334        path.parent()
335            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
336            .unwrap_or(schema_uri)
337    };
338
339    FileResult::Parsed {
340        schema_uri,
341        parsed: ParsedFile {
342            path: path_str,
343            content,
344            instance,
345            original_schema_uri,
346        },
347    }
348}
349
350/// Read each file concurrently with tokio, parse its content, extract its
351/// schema URI, apply rewrites, and group by resolved schema URI.
352#[tracing::instrument(skip_all, fields(file_count = files.len()))]
353async fn parse_and_group_files(
354    files: &[PathBuf],
355    config: &config::Config,
356    config_dir: &Path,
357    compiled_catalogs: &[CompiledCatalog],
358    errors: &mut Vec<LintError>,
359) -> BTreeMap<String, Vec<ParsedFile>> {
360    // Read all files concurrently using tokio async I/O, with a semaphore
361    // to avoid exhausting file descriptors on large directories.
362    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
363    let mut read_set = tokio::task::JoinSet::new();
364    for path in files {
365        let path = path.clone();
366        let sem = semaphore.clone();
367        read_set.spawn(async move {
368            let _permit = sem.acquire().await.expect("semaphore closed");
369            let result = tokio::fs::read_to_string(&path).await;
370            (path, result)
371        });
372    }
373
374    let mut file_contents = Vec::with_capacity(files.len());
375    while let Some(result) = read_set.join_next().await {
376        match result {
377            Ok(item) => file_contents.push(item),
378            Err(e) => tracing::warn!("file read task panicked: {e}"),
379        }
380    }
381
382    // Process files: parse content and resolve schema URIs.
383    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
384    for (path, content_result) in file_contents {
385        let content = match content_result {
386            Ok(c) => c,
387            Err(e) => {
388                errors.push(LintError::Io {
389                    path: path.display().to_string(),
390                    message: format!("failed to read: {e}"),
391                });
392                continue;
393            }
394        };
395        let result = process_one_file(&path, content, config, config_dir, compiled_catalogs);
396        match result {
397            FileResult::Parsed { schema_uri, parsed } => {
398                schema_groups.entry(schema_uri).or_default().push(parsed);
399            }
400            FileResult::Error(e) => errors.push(e),
401            FileResult::Skip => {}
402        }
403    }
404
405    schema_groups
406}
407
408// ---------------------------------------------------------------------------
409// Phase 2: Schema fetching, compilation, and instance validation
410// ---------------------------------------------------------------------------
411
412/// Fetch a schema by URI, returning its parsed JSON and cache status.
413///
414/// For remote URIs, checks the prefetched map first; for local URIs, reads
415/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
416async fn fetch_schema_from_prefetched(
417    schema_uri: &str,
418    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
419    local_cache: &mut HashMap<String, Value>,
420    group: &[ParsedFile],
421    errors: &mut Vec<LintError>,
422    checked: &mut Vec<CheckedFile>,
423    on_check: &mut impl FnMut(&CheckedFile),
424) -> Option<(Value, Option<CacheStatus>)> {
425    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
426
427    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
428        match prefetched.get(schema_uri) {
429            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
430            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
431            None => Err(format!("schema not prefetched: {schema_uri}")),
432        }
433    } else if let Some(cached) = local_cache.get(schema_uri) {
434        Ok((cached.clone(), None))
435    } else {
436        tokio::fs::read_to_string(schema_uri)
437            .await
438            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
439            .and_then(|content| {
440                serde_json::from_str::<Value>(&content)
441                    .map(|v| {
442                        local_cache.insert(schema_uri.to_string(), v.clone());
443                        (v, None)
444                    })
445                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
446            })
447    };
448
449    match result {
450        Ok(value) => Some(value),
451        Err(message) => {
452            report_group_error(
453                |path| LintError::SchemaFetch {
454                    path: path.to_string(),
455                    message: message.clone(),
456                },
457                schema_uri,
458                None,
459                group,
460                errors,
461                checked,
462                on_check,
463            );
464            None
465        }
466    }
467}
468
469/// Report the same error for every file in a schema group.
470fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
471    make_error: impl Fn(&str) -> LintError,
472    schema_uri: &str,
473    cache_status: Option<CacheStatus>,
474    group: &[P],
475    errors: &mut Vec<LintError>,
476    checked: &mut Vec<CheckedFile>,
477    on_check: &mut impl FnMut(&CheckedFile),
478) {
479    for item in group {
480        let pf = item.borrow();
481        let cf = CheckedFile {
482            path: pf.path.clone(),
483            schema: schema_uri.to_string(),
484            cache_status,
485            validation_cache_status: None,
486        };
487        on_check(&cf);
488        checked.push(cf);
489        errors.push(make_error(&pf.path));
490    }
491}
492
493/// Mark every file in a group as checked (no errors).
494fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
495    schema_uri: &str,
496    cache_status: Option<CacheStatus>,
497    validation_cache_status: Option<ValidationCacheStatus>,
498    group: &[P],
499    checked: &mut Vec<CheckedFile>,
500    on_check: &mut impl FnMut(&CheckedFile),
501) {
502    for item in group {
503        let pf = item.borrow();
504        let cf = CheckedFile {
505            path: pf.path.clone(),
506            schema: schema_uri.to_string(),
507            cache_status,
508            validation_cache_status,
509        };
510        on_check(&cf);
511        checked.push(cf);
512    }
513}
514
515/// Clean up error messages from the `jsonschema` crate.
516///
517/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
518/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
519/// The source snippet already shows the value, so we strip the redundant prefix
520/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
521///
522/// All other messages are returned unchanged.
523fn clean_error_message(msg: String) -> String {
524    const MARKER: &str = " is not valid under any of the schemas listed in the '";
525    if let Some(pos) = msg.find(MARKER) {
526        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
527        return msg[pos + 4..].to_string();
528    }
529    msg
530}
531
532/// Convert [`ValidationError`]s into [`LintError::Validation`] diagnostics.
533fn push_validation_errors(
534    pf: &ParsedFile,
535    schema_url: &str,
536    validation_errors: &[ValidationError],
537    errors: &mut Vec<LintError>,
538) {
539    for ve in validation_errors {
540        let span = find_instance_path_span(&pf.content, &ve.instance_path);
541        let instance_path = if ve.instance_path.is_empty() {
542            DEFAULT_LABEL.to_string()
543        } else {
544            ve.instance_path.clone()
545        };
546        let label = format_label(&instance_path, &ve.schema_path);
547        let source_span: miette::SourceSpan = span.into();
548        errors.push(LintError::Validation {
549            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
550            span: source_span,
551            schema_span: source_span,
552            path: pf.path.clone(),
553            instance_path,
554            label,
555            message: ve.message.clone(),
556            schema_url: schema_url.to_string(),
557            schema_path: ve.schema_path.clone(),
558        });
559    }
560}
561
562/// Validate all files in a group against an already-compiled validator and store
563/// results in the validation cache.
564#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
565#[allow(clippy::too_many_arguments)]
566async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
567    validator: &jsonschema::Validator,
568    schema_uri: &str,
569    schema_hash: &str,
570    validate_formats: bool,
571    cache_status: Option<CacheStatus>,
572    group: &[P],
573    vcache: &validation_cache::ValidationCache,
574    errors: &mut Vec<LintError>,
575    checked: &mut Vec<CheckedFile>,
576    on_check: &mut impl FnMut(&CheckedFile),
577) {
578    for item in group {
579        let pf = item.borrow();
580        let file_errors: Vec<ValidationError> = validator
581            .iter_errors(&pf.instance)
582            .map(|error| ValidationError {
583                instance_path: error.instance_path().to_string(),
584                message: clean_error_message(error.to_string()),
585                schema_path: error.schema_path().to_string(),
586            })
587            .collect();
588
589        vcache
590            .store(&pf.content, schema_hash, validate_formats, &file_errors)
591            .await;
592        push_validation_errors(pf, schema_uri, &file_errors, errors);
593
594        let cf = CheckedFile {
595            path: pf.path.clone(),
596            schema: schema_uri.to_string(),
597            cache_status,
598            validation_cache_status: Some(ValidationCacheStatus::Miss),
599        };
600        on_check(&cf);
601        checked.push(cf);
602    }
603}
604
605// ---------------------------------------------------------------------------
606// Public API
607// ---------------------------------------------------------------------------
608
609/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
610///
611/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
612pub async fn fetch_compiled_catalogs(
613    retriever: &SchemaCache,
614    config: &config::Config,
615    no_catalog: bool,
616) -> Vec<CompiledCatalog> {
617    let mut compiled_catalogs = Vec::new();
618
619    if !no_catalog {
620        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
621
622        #[allow(clippy::items_after_statements)]
623        type CatalogResult = (
624            String,
625            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
626        );
627        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
628
629        // Lintel catalog
630        if !config.no_default_catalog {
631            let r = retriever.clone();
632            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
633            catalog_tasks.spawn(async move {
634                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
635                    .await
636                    .map(|cat| CompiledCatalog::compile(&cat));
637                (label, result)
638            });
639        }
640
641        // SchemaStore catalog
642        let r = retriever.clone();
643        catalog_tasks.spawn(async move {
644            let result = catalog::fetch_catalog(&r)
645                .await
646                .map(|cat| CompiledCatalog::compile(&cat));
647            ("SchemaStore catalog".to_string(), result)
648        });
649
650        // Additional registries from lintel.toml
651        for registry_url in &config.registries {
652            let r = retriever.clone();
653            let url = registry_url.clone();
654            let label = format!("registry {url}");
655            catalog_tasks.spawn(async move {
656                let result = registry::fetch(&r, &url)
657                    .await
658                    .map(|cat| CompiledCatalog::compile(&cat));
659                (label, result)
660            });
661        }
662
663        while let Some(result) = catalog_tasks.join_next().await {
664            match result {
665                Ok((_, Ok(compiled))) => compiled_catalogs.push(compiled),
666                Ok((label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
667                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
668            }
669        }
670
671        drop(catalog_span);
672    }
673
674    compiled_catalogs
675}
676
677/// # Errors
678///
679/// Returns an error if file collection or schema validation encounters an I/O error.
680pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
681    run_with(args, None, |_| {}).await
682}
683
684/// Like [`run`], but calls `on_check` each time a file is checked, allowing
685/// callers to stream progress (e.g. verbose output) as files are processed.
686///
687/// # Errors
688///
689/// Returns an error if file collection or schema validation encounters an I/O error.
690#[tracing::instrument(skip_all, name = "validate")]
691#[allow(clippy::too_many_lines)]
692pub async fn run_with(
693    args: &ValidateArgs,
694    cache: Option<SchemaCache>,
695    mut on_check: impl FnMut(&CheckedFile),
696) -> Result<ValidateResult> {
697    let retriever = if let Some(c) = cache {
698        c
699    } else {
700        let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
701        if let Some(dir) = &args.cache_dir {
702            let path = PathBuf::from(dir);
703            let _ = fs::create_dir_all(&path);
704            builder = builder.cache_dir(path);
705        }
706        if let Some(ttl) = args.schema_cache_ttl {
707            builder = builder.ttl(ttl);
708        }
709        builder.build()
710    };
711
712    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
713    let files = collect_files(&args.globs, &args.exclude)?;
714    tracing::info!(file_count = files.len(), "collected files");
715
716    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
717
718    let mut errors: Vec<LintError> = Vec::new();
719    let mut checked: Vec<CheckedFile> = Vec::new();
720
721    // Validate lintel.toml against its own schema
722    if let Some(config_path) = config_path {
723        validate_config(&config_path, &mut errors, &mut checked, &mut on_check).await?;
724    }
725
726    // Phase 1: Parse files and resolve schema URIs
727    let schema_groups = parse_and_group_files(
728        &files,
729        &config,
730        &config_dir,
731        &compiled_catalogs,
732        &mut errors,
733    )
734    .await;
735    tracing::info!(
736        schema_count = schema_groups.len(),
737        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
738        "grouped files by schema"
739    );
740
741    // Create validation cache
742    let vcache = validation_cache::ValidationCache::new(
743        validation_cache::ensure_cache_dir(),
744        args.force_validation,
745    );
746
747    // Prefetch all remote schemas in parallel
748    let remote_uris: Vec<&String> = schema_groups
749        .keys()
750        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
751        .collect();
752
753    let prefetched = {
754        let _prefetch_span =
755            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
756
757        let mut schema_tasks = tokio::task::JoinSet::new();
758        for uri in remote_uris {
759            let r = retriever.clone();
760            let u = uri.clone();
761            schema_tasks.spawn(async move {
762                let result = r.fetch(&u).await;
763                (u, result)
764            });
765        }
766
767        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
768        while let Some(result) = schema_tasks.join_next().await {
769            match result {
770                Ok((uri, fetch_result)) => {
771                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
772                }
773                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
774            }
775        }
776
777        prefetched
778    };
779
780    // Phase 2: Compile each schema once and validate all matching files
781    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
782    let mut fetch_time = core::time::Duration::ZERO;
783    let mut hash_time = core::time::Duration::ZERO;
784    let mut vcache_time = core::time::Duration::ZERO;
785    let mut compile_time = core::time::Duration::ZERO;
786    let mut validate_time = core::time::Duration::ZERO;
787
788    for (schema_uri, group) in &schema_groups {
789        let _group_span = tracing::debug_span!(
790            "schema_group",
791            schema = schema_uri.as_str(),
792            files = group.len(),
793        )
794        .entered();
795
796        // If ANY file in the group matches a `validate_formats = false` override,
797        // disable format validation for the whole group (they share one compiled validator).
798        let validate_formats = group.iter().all(|pf| {
799            config
800                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
801        });
802
803        // Remote schemas were prefetched in parallel above; local schemas are
804        // read from disk here (with in-memory caching).
805        let t = std::time::Instant::now();
806        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
807            schema_uri,
808            &prefetched,
809            &mut local_schema_cache,
810            group,
811            &mut errors,
812            &mut checked,
813            &mut on_check,
814        )
815        .await
816        else {
817            fetch_time += t.elapsed();
818            continue;
819        };
820        fetch_time += t.elapsed();
821
822        // Pre-compute schema hash once for the entire group.
823        let t = std::time::Instant::now();
824        let schema_hash = validation_cache::schema_hash(&schema_value);
825        hash_time += t.elapsed();
826
827        // Split the group into validation cache hits and misses.
828        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
829
830        let t = std::time::Instant::now();
831        for pf in group {
832            let (cached, vcache_status) = vcache
833                .lookup(&pf.content, &schema_hash, validate_formats)
834                .await;
835
836            if let Some(cached_errors) = cached {
837                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
838                let cf = CheckedFile {
839                    path: pf.path.clone(),
840                    schema: schema_uri.clone(),
841                    cache_status,
842                    validation_cache_status: Some(vcache_status),
843                };
844                on_check(&cf);
845                checked.push(cf);
846            } else {
847                cache_misses.push(pf);
848            }
849        }
850        vcache_time += t.elapsed();
851
852        tracing::debug!(
853            cache_hits = group.len() - cache_misses.len(),
854            cache_misses = cache_misses.len(),
855            "validation cache"
856        );
857
858        // If all files hit the validation cache, skip schema compilation entirely.
859        if cache_misses.is_empty() {
860            continue;
861        }
862
863        // Compile the schema for cache misses.
864        let t = std::time::Instant::now();
865        let validator = {
866            match jsonschema::async_options()
867                .with_retriever(retriever.clone())
868                .should_validate_formats(validate_formats)
869                .build(&schema_value)
870                .await
871            {
872                Ok(v) => v,
873                Err(e) => {
874                    compile_time += t.elapsed();
875                    // When format validation is disabled and the compilation error
876                    // is a uri-reference issue (e.g. Rust-style $ref paths in
877                    // vector.json), skip validation silently.
878                    if !validate_formats && e.to_string().contains("uri-reference") {
879                        mark_group_checked(
880                            schema_uri,
881                            cache_status,
882                            Some(ValidationCacheStatus::Miss),
883                            &cache_misses,
884                            &mut checked,
885                            &mut on_check,
886                        );
887                        continue;
888                    }
889                    let msg = format!("failed to compile schema: {e}");
890                    report_group_error(
891                        |path| LintError::SchemaCompile {
892                            path: path.to_string(),
893                            message: msg.clone(),
894                        },
895                        schema_uri,
896                        cache_status,
897                        &cache_misses,
898                        &mut errors,
899                        &mut checked,
900                        &mut on_check,
901                    );
902                    continue;
903                }
904            }
905        };
906        compile_time += t.elapsed();
907
908        let t = std::time::Instant::now();
909        validate_group(
910            &validator,
911            schema_uri,
912            &schema_hash,
913            validate_formats,
914            cache_status,
915            &cache_misses,
916            &vcache,
917            &mut errors,
918            &mut checked,
919            &mut on_check,
920        )
921        .await;
922        validate_time += t.elapsed();
923    }
924
925    #[allow(clippy::cast_possible_truncation)]
926    {
927        tracing::info!(
928            fetch_ms = fetch_time.as_millis() as u64,
929            hash_ms = hash_time.as_millis() as u64,
930            vcache_ms = vcache_time.as_millis() as u64,
931            compile_ms = compile_time.as_millis() as u64,
932            validate_ms = validate_time.as_millis() as u64,
933            "phase2 breakdown"
934        );
935    }
936
937    // Sort errors for deterministic output (by path, then by span offset)
938    errors.sort_by(|a, b| {
939        a.path()
940            .cmp(b.path())
941            .then_with(|| a.offset().cmp(&b.offset()))
942    });
943
944    Ok(ValidateResult { errors, checked })
945}
946
947#[cfg(test)]
948mod tests {
949    use super::*;
950    use crate::retriever::SchemaCache;
951    use std::path::Path;
952
953    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
954        let cache = SchemaCache::memory();
955        for (uri, body) in entries {
956            cache.insert(
957                uri,
958                serde_json::from_str(body).expect("test mock: invalid JSON"),
959            );
960        }
961        cache
962    }
963
964    fn testdata() -> PathBuf {
965        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
966    }
967
968    /// Build glob patterns that scan one or more testdata directories for all supported file types.
969    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
970        dirs.iter()
971            .flat_map(|dir| {
972                let base = testdata().join(dir);
973                vec![
974                    base.join("*.json").to_string_lossy().to_string(),
975                    base.join("*.yaml").to_string_lossy().to_string(),
976                    base.join("*.yml").to_string_lossy().to_string(),
977                    base.join("*.json5").to_string_lossy().to_string(),
978                    base.join("*.jsonc").to_string_lossy().to_string(),
979                    base.join("*.toml").to_string_lossy().to_string(),
980                ]
981            })
982            .collect()
983    }
984
985    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
986        ValidateArgs {
987            globs: scenario_globs(dirs),
988            exclude: vec![],
989            cache_dir: None,
990            force_schema_fetch: true,
991            force_validation: true,
992            no_catalog: true,
993            config_dir: None,
994            schema_cache_ttl: None,
995        }
996    }
997
998    const SCHEMA: &str =
999        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1000
1001    fn schema_mock() -> SchemaCache {
1002        mock(&[("https://example.com/schema.json", SCHEMA)])
1003    }
1004
1005    // --- Directory scanning tests ---
1006
1007    #[tokio::test]
1008    async fn no_matching_files() -> anyhow::Result<()> {
1009        let tmp = tempfile::tempdir()?;
1010        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1011        let c = ValidateArgs {
1012            globs: vec![pattern],
1013            exclude: vec![],
1014            cache_dir: None,
1015            force_schema_fetch: true,
1016            force_validation: true,
1017            no_catalog: true,
1018            config_dir: None,
1019            schema_cache_ttl: None,
1020        };
1021        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1022        assert!(!result.has_errors());
1023        Ok(())
1024    }
1025
1026    #[tokio::test]
1027    async fn dir_all_valid() -> anyhow::Result<()> {
1028        let c = args_for_dirs(&["positive_tests"]);
1029        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1030        assert!(!result.has_errors());
1031        Ok(())
1032    }
1033
1034    #[tokio::test]
1035    async fn dir_all_invalid() -> anyhow::Result<()> {
1036        let c = args_for_dirs(&["negative_tests"]);
1037        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1038        assert!(result.has_errors());
1039        Ok(())
1040    }
1041
1042    #[tokio::test]
1043    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1044        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1045        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1046        assert!(result.has_errors());
1047        Ok(())
1048    }
1049
1050    #[tokio::test]
1051    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1052        let c = args_for_dirs(&["no_schema"]);
1053        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1054        assert!(!result.has_errors());
1055        Ok(())
1056    }
1057
1058    #[tokio::test]
1059    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1060        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1061        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1062        assert!(!result.has_errors());
1063        Ok(())
1064    }
1065
1066    // --- Directory as positional arg ---
1067
1068    #[tokio::test]
1069    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1070        let dir = testdata().join("positive_tests");
1071        let c = ValidateArgs {
1072            globs: vec![dir.to_string_lossy().to_string()],
1073            exclude: vec![],
1074            cache_dir: None,
1075            force_schema_fetch: true,
1076            force_validation: true,
1077            no_catalog: true,
1078            config_dir: None,
1079            schema_cache_ttl: None,
1080        };
1081        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1082        assert!(!result.has_errors());
1083        assert!(result.files_checked() > 0);
1084        Ok(())
1085    }
1086
1087    #[tokio::test]
1088    async fn multiple_directory_args() -> anyhow::Result<()> {
1089        let pos_dir = testdata().join("positive_tests");
1090        let no_schema_dir = testdata().join("no_schema");
1091        let c = ValidateArgs {
1092            globs: vec![
1093                pos_dir.to_string_lossy().to_string(),
1094                no_schema_dir.to_string_lossy().to_string(),
1095            ],
1096            exclude: vec![],
1097            cache_dir: None,
1098            force_schema_fetch: true,
1099            force_validation: true,
1100            no_catalog: true,
1101            config_dir: None,
1102            schema_cache_ttl: None,
1103        };
1104        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1105        assert!(!result.has_errors());
1106        Ok(())
1107    }
1108
1109    #[tokio::test]
1110    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1111        let dir = testdata().join("positive_tests");
1112        let glob_pattern = testdata()
1113            .join("no_schema")
1114            .join("*.json")
1115            .to_string_lossy()
1116            .to_string();
1117        let c = ValidateArgs {
1118            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1119            exclude: vec![],
1120            cache_dir: None,
1121            force_schema_fetch: true,
1122            force_validation: true,
1123            no_catalog: true,
1124            config_dir: None,
1125            schema_cache_ttl: None,
1126        };
1127        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1128        assert!(!result.has_errors());
1129        Ok(())
1130    }
1131
1132    #[tokio::test]
1133    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1134        let base = testdata().join("malformed");
1135        let c = ValidateArgs {
1136            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1137            exclude: vec![],
1138            cache_dir: None,
1139            force_schema_fetch: true,
1140            force_validation: true,
1141            no_catalog: true,
1142            config_dir: None,
1143            schema_cache_ttl: None,
1144        };
1145        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1146        assert!(result.has_errors());
1147        Ok(())
1148    }
1149
1150    #[tokio::test]
1151    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1152        let base = testdata().join("malformed");
1153        let c = ValidateArgs {
1154            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1155            exclude: vec![],
1156            cache_dir: None,
1157            force_schema_fetch: true,
1158            force_validation: true,
1159            no_catalog: true,
1160            config_dir: None,
1161            schema_cache_ttl: None,
1162        };
1163        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1164        assert!(result.has_errors());
1165        Ok(())
1166    }
1167
1168    // --- Exclude filter ---
1169
1170    #[tokio::test]
1171    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1172        let base = testdata().join("negative_tests");
1173        let c = ValidateArgs {
1174            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1175            exclude: vec![
1176                base.join("missing_name.json").to_string_lossy().to_string(),
1177                base.join("missing_name.toml").to_string_lossy().to_string(),
1178                base.join("missing_name.yaml").to_string_lossy().to_string(),
1179            ],
1180            cache_dir: None,
1181            force_schema_fetch: true,
1182            force_validation: true,
1183            no_catalog: true,
1184            config_dir: None,
1185            schema_cache_ttl: None,
1186        };
1187        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1188        assert!(!result.has_errors());
1189        Ok(())
1190    }
1191
1192    // --- Cache options ---
1193
1194    #[tokio::test]
1195    async fn custom_cache_dir() -> anyhow::Result<()> {
1196        let c = ValidateArgs {
1197            globs: scenario_globs(&["positive_tests"]),
1198            exclude: vec![],
1199            cache_dir: None,
1200            force_schema_fetch: true,
1201            force_validation: true,
1202            no_catalog: true,
1203            config_dir: None,
1204            schema_cache_ttl: None,
1205        };
1206        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1207        assert!(!result.has_errors());
1208        Ok(())
1209    }
1210
1211    // --- Local schema ---
1212
1213    #[tokio::test]
1214    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1215        let tmp = tempfile::tempdir()?;
1216        let schema_path = tmp.path().join("schema.json");
1217        fs::write(&schema_path, SCHEMA)?;
1218
1219        let f = tmp.path().join("valid.json");
1220        fs::write(
1221            &f,
1222            format!(
1223                r#"{{"$schema":"{}","name":"hello"}}"#,
1224                schema_path.to_string_lossy()
1225            ),
1226        )?;
1227
1228        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1229        let c = ValidateArgs {
1230            globs: vec![pattern],
1231            exclude: vec![],
1232            cache_dir: None,
1233            force_schema_fetch: true,
1234            force_validation: true,
1235            no_catalog: true,
1236            config_dir: None,
1237            schema_cache_ttl: None,
1238        };
1239        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1240        assert!(!result.has_errors());
1241        Ok(())
1242    }
1243
1244    #[tokio::test]
1245    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1246        let tmp = tempfile::tempdir()?;
1247        let schema_path = tmp.path().join("schema.json");
1248        fs::write(&schema_path, SCHEMA)?;
1249
1250        let f = tmp.path().join("valid.yaml");
1251        fs::write(
1252            &f,
1253            format!(
1254                "# yaml-language-server: $schema={}\nname: hello\n",
1255                schema_path.to_string_lossy()
1256            ),
1257        )?;
1258
1259        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1260        let c = ValidateArgs {
1261            globs: vec![pattern],
1262            exclude: vec![],
1263            cache_dir: None,
1264            force_schema_fetch: true,
1265            force_validation: true,
1266            no_catalog: true,
1267            config_dir: None,
1268            schema_cache_ttl: None,
1269        };
1270        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1271        assert!(!result.has_errors());
1272        Ok(())
1273    }
1274
1275    #[tokio::test]
1276    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1277        let tmp = tempfile::tempdir()?;
1278        let f = tmp.path().join("ref.json");
1279        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1280
1281        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1282        let c = ValidateArgs {
1283            globs: vec![pattern],
1284            exclude: vec![],
1285            cache_dir: None,
1286            force_schema_fetch: true,
1287            force_validation: true,
1288            no_catalog: true,
1289            config_dir: None,
1290            schema_cache_ttl: None,
1291        };
1292        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1293        assert!(result.has_errors());
1294        Ok(())
1295    }
1296
1297    // --- JSON5 / JSONC tests ---
1298
1299    #[tokio::test]
1300    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1301        let tmp = tempfile::tempdir()?;
1302        let schema_path = tmp.path().join("schema.json");
1303        fs::write(&schema_path, SCHEMA)?;
1304
1305        let f = tmp.path().join("config.json5");
1306        fs::write(
1307            &f,
1308            format!(
1309                r#"{{
1310  // JSON5 comment
1311  "$schema": "{}",
1312  name: "hello",
1313}}"#,
1314                schema_path.to_string_lossy()
1315            ),
1316        )?;
1317
1318        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1319        let c = ValidateArgs {
1320            globs: vec![pattern],
1321            exclude: vec![],
1322            cache_dir: None,
1323            force_schema_fetch: true,
1324            force_validation: true,
1325            no_catalog: true,
1326            config_dir: None,
1327            schema_cache_ttl: None,
1328        };
1329        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1330        assert!(!result.has_errors());
1331        Ok(())
1332    }
1333
1334    #[tokio::test]
1335    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1336        let tmp = tempfile::tempdir()?;
1337        let schema_path = tmp.path().join("schema.json");
1338        fs::write(&schema_path, SCHEMA)?;
1339
1340        let f = tmp.path().join("config.jsonc");
1341        fs::write(
1342            &f,
1343            format!(
1344                r#"{{
1345  /* JSONC comment */
1346  "$schema": "{}",
1347  "name": "hello"
1348}}"#,
1349                schema_path.to_string_lossy()
1350            ),
1351        )?;
1352
1353        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1354        let c = ValidateArgs {
1355            globs: vec![pattern],
1356            exclude: vec![],
1357            cache_dir: None,
1358            force_schema_fetch: true,
1359            force_validation: true,
1360            no_catalog: true,
1361            config_dir: None,
1362            schema_cache_ttl: None,
1363        };
1364        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1365        assert!(!result.has_errors());
1366        Ok(())
1367    }
1368
1369    // --- Catalog-based schema matching ---
1370
1371    const GH_WORKFLOW_SCHEMA: &str = r#"{
1372        "type": "object",
1373        "properties": {
1374            "name": { "type": "string" },
1375            "on": {},
1376            "jobs": { "type": "object" }
1377        },
1378        "required": ["on", "jobs"]
1379    }"#;
1380
1381    fn gh_catalog_json() -> String {
1382        r#"{"schemas":[{
1383            "name": "GitHub Workflow",
1384            "url": "https://www.schemastore.org/github-workflow.json",
1385            "fileMatch": [
1386                "**/.github/workflows/*.yml",
1387                "**/.github/workflows/*.yaml"
1388            ]
1389        }]}"#
1390            .to_string()
1391    }
1392
1393    #[tokio::test]
1394    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1395        let tmp = tempfile::tempdir()?;
1396        let cache_tmp = tempfile::tempdir()?;
1397        let wf_dir = tmp.path().join(".github/workflows");
1398        fs::create_dir_all(&wf_dir)?;
1399        fs::write(
1400            wf_dir.join("ci.yml"),
1401            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1402        )?;
1403
1404        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1405        let client = mock(&[
1406            (
1407                "https://www.schemastore.org/api/json/catalog.json",
1408                &gh_catalog_json(),
1409            ),
1410            (
1411                "https://www.schemastore.org/github-workflow.json",
1412                GH_WORKFLOW_SCHEMA,
1413            ),
1414        ]);
1415        let c = ValidateArgs {
1416            globs: vec![pattern],
1417            exclude: vec![],
1418            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1419            force_schema_fetch: true,
1420            force_validation: true,
1421            no_catalog: false,
1422            config_dir: None,
1423            schema_cache_ttl: None,
1424        };
1425        let result = run_with(&c, Some(client), |_| {}).await?;
1426        assert!(!result.has_errors());
1427        Ok(())
1428    }
1429
1430    #[tokio::test]
1431    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1432        let tmp = tempfile::tempdir()?;
1433        let cache_tmp = tempfile::tempdir()?;
1434        let wf_dir = tmp.path().join(".github/workflows");
1435        fs::create_dir_all(&wf_dir)?;
1436        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1437
1438        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1439        let client = mock(&[
1440            (
1441                "https://www.schemastore.org/api/json/catalog.json",
1442                &gh_catalog_json(),
1443            ),
1444            (
1445                "https://www.schemastore.org/github-workflow.json",
1446                GH_WORKFLOW_SCHEMA,
1447            ),
1448        ]);
1449        let c = ValidateArgs {
1450            globs: vec![pattern],
1451            exclude: vec![],
1452            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1453            force_schema_fetch: true,
1454            force_validation: true,
1455            no_catalog: false,
1456            config_dir: None,
1457            schema_cache_ttl: None,
1458        };
1459        let result = run_with(&c, Some(client), |_| {}).await?;
1460        assert!(result.has_errors());
1461        Ok(())
1462    }
1463
1464    #[tokio::test]
1465    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1466        let tmp = tempfile::tempdir()?;
1467        let cache_tmp = tempfile::tempdir()?;
1468        let wf_dir = tmp.path().join(".github/workflows");
1469        fs::create_dir_all(&wf_dir)?;
1470        fs::write(
1471            wf_dir.join("ci.yml"),
1472            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1473        )?;
1474
1475        let client = mock(&[
1476            (
1477                "https://www.schemastore.org/api/json/catalog.json",
1478                &gh_catalog_json(),
1479            ),
1480            (
1481                "https://www.schemastore.org/github-workflow.json",
1482                GH_WORKFLOW_SCHEMA,
1483            ),
1484        ]);
1485        let c = ValidateArgs {
1486            globs: vec![],
1487            exclude: vec![],
1488            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1489            force_schema_fetch: true,
1490            force_validation: true,
1491            no_catalog: false,
1492            config_dir: None,
1493            schema_cache_ttl: None,
1494        };
1495
1496        let orig_dir = std::env::current_dir()?;
1497        std::env::set_current_dir(tmp.path())?;
1498        let result = run_with(&c, Some(client), |_| {}).await?;
1499        std::env::set_current_dir(orig_dir)?;
1500
1501        assert!(!result.has_errors());
1502        Ok(())
1503    }
1504
1505    // --- TOML tests ---
1506
1507    #[tokio::test]
1508    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1509        let tmp = tempfile::tempdir()?;
1510        let schema_path = tmp.path().join("schema.json");
1511        fs::write(&schema_path, SCHEMA)?;
1512
1513        let f = tmp.path().join("config.toml");
1514        fs::write(
1515            &f,
1516            format!(
1517                "# :schema {}\nname = \"hello\"\n",
1518                schema_path.to_string_lossy()
1519            ),
1520        )?;
1521
1522        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1523        let c = ValidateArgs {
1524            globs: vec![pattern],
1525            exclude: vec![],
1526            cache_dir: None,
1527            force_schema_fetch: true,
1528            force_validation: true,
1529            no_catalog: true,
1530            config_dir: None,
1531            schema_cache_ttl: None,
1532        };
1533        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1534        assert!(!result.has_errors());
1535        Ok(())
1536    }
1537
1538    // --- Rewrite rules + // resolution ---
1539
1540    #[tokio::test]
1541    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1542        let tmp = tempfile::tempdir()?;
1543
1544        let schemas_dir = tmp.path().join("schemas");
1545        fs::create_dir_all(&schemas_dir)?;
1546        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1547
1548        fs::write(
1549            tmp.path().join("lintel.toml"),
1550            r#"
1551[rewrite]
1552"http://localhost:9000/" = "//schemas/"
1553"#,
1554        )?;
1555
1556        let f = tmp.path().join("config.json");
1557        fs::write(
1558            &f,
1559            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1560        )?;
1561
1562        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1563        let c = ValidateArgs {
1564            globs: vec![pattern],
1565            exclude: vec![],
1566            cache_dir: None,
1567            force_schema_fetch: true,
1568            force_validation: true,
1569            no_catalog: true,
1570            config_dir: Some(tmp.path().to_path_buf()),
1571            schema_cache_ttl: None,
1572        };
1573
1574        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1575        assert!(!result.has_errors());
1576        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1577        Ok(())
1578    }
1579
1580    #[tokio::test]
1581    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1582        let tmp = tempfile::tempdir()?;
1583
1584        let schemas_dir = tmp.path().join("schemas");
1585        fs::create_dir_all(&schemas_dir)?;
1586        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1587
1588        fs::write(tmp.path().join("lintel.toml"), "")?;
1589
1590        let sub = tmp.path().join("deeply/nested");
1591        fs::create_dir_all(&sub)?;
1592        let f = sub.join("config.json");
1593        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1594
1595        let pattern = sub.join("*.json").to_string_lossy().to_string();
1596        let c = ValidateArgs {
1597            globs: vec![pattern],
1598            exclude: vec![],
1599            cache_dir: None,
1600            force_schema_fetch: true,
1601            force_validation: true,
1602            no_catalog: true,
1603            config_dir: Some(tmp.path().to_path_buf()),
1604            schema_cache_ttl: None,
1605        };
1606
1607        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1608        assert!(!result.has_errors());
1609        Ok(())
1610    }
1611
1612    // --- Format validation override ---
1613
1614    const FORMAT_SCHEMA: &str = r#"{
1615        "type": "object",
1616        "properties": {
1617            "link": { "type": "string", "format": "uri-reference" }
1618        }
1619    }"#;
1620
1621    #[tokio::test]
1622    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1623        let tmp = tempfile::tempdir()?;
1624        let schema_path = tmp.path().join("schema.json");
1625        fs::write(&schema_path, FORMAT_SCHEMA)?;
1626
1627        let f = tmp.path().join("data.json");
1628        fs::write(
1629            &f,
1630            format!(
1631                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1632                schema_path.to_string_lossy()
1633            ),
1634        )?;
1635
1636        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1637        let c = ValidateArgs {
1638            globs: vec![pattern],
1639            exclude: vec![],
1640            cache_dir: None,
1641            force_schema_fetch: true,
1642            force_validation: true,
1643            no_catalog: true,
1644            config_dir: Some(tmp.path().to_path_buf()),
1645            schema_cache_ttl: None,
1646        };
1647        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1648        assert!(
1649            result.has_errors(),
1650            "expected format error without override"
1651        );
1652        Ok(())
1653    }
1654
1655    #[tokio::test]
1656    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1657        let tmp = tempfile::tempdir()?;
1658        let schema_path = tmp.path().join("schema.json");
1659        fs::write(&schema_path, FORMAT_SCHEMA)?;
1660
1661        let f = tmp.path().join("data.json");
1662        fs::write(
1663            &f,
1664            format!(
1665                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1666                schema_path.to_string_lossy()
1667            ),
1668        )?;
1669
1670        // Use **/data.json to match the absolute path from the tempdir.
1671        fs::write(
1672            tmp.path().join("lintel.toml"),
1673            r#"
1674[[override]]
1675files = ["**/data.json"]
1676validate_formats = false
1677"#,
1678        )?;
1679
1680        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1681        let c = ValidateArgs {
1682            globs: vec![pattern],
1683            exclude: vec![],
1684            cache_dir: None,
1685            force_schema_fetch: true,
1686            force_validation: true,
1687            no_catalog: true,
1688            config_dir: Some(tmp.path().to_path_buf()),
1689            schema_cache_ttl: None,
1690        };
1691        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1692        assert!(
1693            !result.has_errors(),
1694            "expected no errors with validate_formats = false override"
1695        );
1696        Ok(())
1697    }
1698
1699    // --- Unrecognized extension handling ---
1700
1701    #[tokio::test]
1702    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1703        let tmp = tempfile::tempdir()?;
1704        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1705
1706        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1707        let c = ValidateArgs {
1708            globs: vec![pattern],
1709            exclude: vec![],
1710            cache_dir: None,
1711            force_schema_fetch: true,
1712            force_validation: true,
1713            no_catalog: true,
1714            config_dir: Some(tmp.path().to_path_buf()),
1715            schema_cache_ttl: None,
1716        };
1717        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1718        assert!(!result.has_errors());
1719        assert_eq!(result.files_checked(), 0);
1720        Ok(())
1721    }
1722
1723    #[tokio::test]
1724    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1725        let tmp = tempfile::tempdir()?;
1726        let cache_tmp = tempfile::tempdir()?;
1727        // File has .cfg extension (unrecognized) but content is valid JSON
1728        fs::write(
1729            tmp.path().join("myapp.cfg"),
1730            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1731        )?;
1732
1733        let catalog_json = r#"{"schemas":[{
1734            "name": "MyApp Config",
1735            "url": "https://example.com/myapp.schema.json",
1736            "fileMatch": ["*.cfg"]
1737        }]}"#;
1738        let schema =
1739            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1740
1741        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1742        let client = mock(&[
1743            (
1744                "https://www.schemastore.org/api/json/catalog.json",
1745                catalog_json,
1746            ),
1747            ("https://example.com/myapp.schema.json", schema),
1748        ]);
1749        let c = ValidateArgs {
1750            globs: vec![pattern],
1751            exclude: vec![],
1752            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1753            force_schema_fetch: true,
1754            force_validation: true,
1755            no_catalog: false,
1756            config_dir: Some(tmp.path().to_path_buf()),
1757            schema_cache_ttl: None,
1758        };
1759        let result = run_with(&c, Some(client), |_| {}).await?;
1760        assert!(!result.has_errors());
1761        assert_eq!(result.files_checked(), 1);
1762        Ok(())
1763    }
1764
1765    #[tokio::test]
1766    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1767        let tmp = tempfile::tempdir()?;
1768        let cache_tmp = tempfile::tempdir()?;
1769        // File matches catalog but content isn't parseable by any format
1770        fs::write(
1771            tmp.path().join("myapp.cfg"),
1772            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1773        )?;
1774
1775        let catalog_json = r#"{"schemas":[{
1776            "name": "MyApp Config",
1777            "url": "https://example.com/myapp.schema.json",
1778            "fileMatch": ["*.cfg"]
1779        }]}"#;
1780
1781        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1782        let client = mock(&[(
1783            "https://www.schemastore.org/api/json/catalog.json",
1784            catalog_json,
1785        )]);
1786        let c = ValidateArgs {
1787            globs: vec![pattern],
1788            exclude: vec![],
1789            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1790            force_schema_fetch: true,
1791            force_validation: true,
1792            no_catalog: false,
1793            config_dir: Some(tmp.path().to_path_buf()),
1794            schema_cache_ttl: None,
1795        };
1796        let result = run_with(&c, Some(client), |_| {}).await?;
1797        assert!(!result.has_errors());
1798        assert_eq!(result.files_checked(), 0);
1799        Ok(())
1800    }
1801
1802    #[tokio::test]
1803    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1804        let tmp = tempfile::tempdir()?;
1805        let cache_tmp = tempfile::tempdir()?;
1806        // File has .cfg extension, content is valid JSON but fails schema validation
1807        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1808
1809        let catalog_json = r#"{"schemas":[{
1810            "name": "MyApp Config",
1811            "url": "https://example.com/myapp.schema.json",
1812            "fileMatch": ["*.cfg"]
1813        }]}"#;
1814        let schema =
1815            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1816
1817        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1818        let client = mock(&[
1819            (
1820                "https://www.schemastore.org/api/json/catalog.json",
1821                catalog_json,
1822            ),
1823            ("https://example.com/myapp.schema.json", schema),
1824        ]);
1825        let c = ValidateArgs {
1826            globs: vec![pattern],
1827            exclude: vec![],
1828            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1829            force_schema_fetch: true,
1830            force_validation: true,
1831            no_catalog: false,
1832            config_dir: Some(tmp.path().to_path_buf()),
1833            schema_cache_ttl: None,
1834        };
1835        let result = run_with(&c, Some(client), |_| {}).await?;
1836        assert!(result.has_errors());
1837        assert_eq!(result.files_checked(), 1);
1838        Ok(())
1839    }
1840
1841    // --- Validation cache ---
1842
1843    #[tokio::test]
1844    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1845        let tmp = tempfile::tempdir()?;
1846        let schema_path = tmp.path().join("schema.json");
1847        fs::write(&schema_path, SCHEMA)?;
1848
1849        let f = tmp.path().join("valid.json");
1850        fs::write(
1851            &f,
1852            format!(
1853                r#"{{"$schema":"{}","name":"hello"}}"#,
1854                schema_path.to_string_lossy()
1855            ),
1856        )?;
1857
1858        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1859
1860        // First run: force_validation = false so results get cached
1861        let c = ValidateArgs {
1862            globs: vec![pattern.clone()],
1863            exclude: vec![],
1864            cache_dir: None,
1865            force_schema_fetch: true,
1866            force_validation: false,
1867            no_catalog: true,
1868            config_dir: None,
1869            schema_cache_ttl: None,
1870        };
1871        let mut first_statuses = Vec::new();
1872        let result = run_with(&c, Some(mock(&[])), |cf| {
1873            first_statuses.push(cf.validation_cache_status);
1874        })
1875        .await?;
1876        assert!(!result.has_errors());
1877        assert!(result.files_checked() > 0);
1878
1879        // Verify the first run recorded a validation cache miss
1880        assert!(
1881            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
1882            "expected at least one validation cache miss on first run"
1883        );
1884
1885        // Second run: same file, same schema — should hit validation cache
1886        let mut second_statuses = Vec::new();
1887        let result = run_with(&c, Some(mock(&[])), |cf| {
1888            second_statuses.push(cf.validation_cache_status);
1889        })
1890        .await?;
1891        assert!(!result.has_errors());
1892
1893        // Verify the second run got a validation cache hit
1894        assert!(
1895            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
1896            "expected at least one validation cache hit on second run"
1897        );
1898        Ok(())
1899    }
1900
1901    // --- clean_error_message ---
1902
1903    #[test]
1904    fn clean_strips_anyof_value() {
1905        let msg =
1906            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
1907        assert_eq!(
1908            clean_error_message(msg.to_string()),
1909            "not valid under any of the schemas listed in the 'anyOf' keyword"
1910        );
1911    }
1912
1913    #[test]
1914    fn clean_strips_oneof_value() {
1915        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
1916        assert_eq!(
1917            clean_error_message(msg.to_string()),
1918            "not valid under any of the schemas listed in the 'oneOf' keyword"
1919        );
1920    }
1921
1922    #[test]
1923    fn clean_strips_long_value() {
1924        let long_value = "x".repeat(5000);
1925        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
1926        let msg = format!("{long_value}{suffix}");
1927        assert_eq!(
1928            clean_error_message(msg),
1929            "not valid under any of the schemas listed in the 'anyOf' keyword"
1930        );
1931    }
1932
1933    #[test]
1934    fn clean_preserves_type_error() {
1935        let msg = r#"12345 is not of types "null", "string""#;
1936        assert_eq!(clean_error_message(msg.to_string()), msg);
1937    }
1938
1939    #[test]
1940    fn clean_preserves_required_property() {
1941        let msg = "\"name\" is a required property";
1942        assert_eq!(clean_error_message(msg.to_string()), msg);
1943    }
1944}