Skip to main content

lintel_check/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog::{self, CompiledCatalog};
11use crate::config;
12use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
13use crate::discover;
14use crate::parsers::{self, FileFormat, JsoncParser, Parser};
15use crate::registry;
16use crate::retriever::{CacheStatus, HttpClient, SchemaCache, ensure_cache_dir};
17use crate::validation_cache::{self, ValidationCacheStatus, ValidationError};
18
19/// Conservative limit for concurrent file reads to avoid exhausting file
20/// descriptors. 128 is well below the default soft limit on macOS (256) and
21/// Linux (1024) while still providing good throughput.
22const FD_CONCURRENCY_LIMIT: usize = 128;
23
24pub struct ValidateArgs {
25    /// Glob patterns to find files (empty = auto-discover)
26    pub globs: Vec<String>,
27
28    /// Exclude files matching these globs (repeatable)
29    pub exclude: Vec<String>,
30
31    /// Cache directory for remote schemas
32    pub cache_dir: Option<String>,
33
34    /// Bypass schema cache reads (still writes fetched schemas to cache)
35    pub force_schema_fetch: bool,
36
37    /// Bypass validation cache reads (still writes results to cache)
38    pub force_validation: bool,
39
40    /// Disable `SchemaStore` catalog matching
41    pub no_catalog: bool,
42
43    /// Directory to search for `lintel.toml` (defaults to cwd)
44    pub config_dir: Option<PathBuf>,
45
46    /// TTL for cached schemas. `None` means no expiry.
47    pub schema_cache_ttl: Option<core::time::Duration>,
48}
49
50/// Re-exported from [`crate::diagnostics::LintError`] for backwards
51/// compatibility with existing `use lintel_check::validate::LintError` paths.
52pub use crate::diagnostics::LintError;
53
54/// A file that was checked and the schema it resolved to.
55pub struct CheckedFile {
56    pub path: String,
57    pub schema: String,
58    /// `None` for local schemas and builtins; `Some` for remote schemas.
59    pub cache_status: Option<CacheStatus>,
60    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
61    pub validation_cache_status: Option<ValidationCacheStatus>,
62}
63
64/// Result of a validation run.
65pub struct ValidateResult {
66    pub errors: Vec<LintError>,
67    pub checked: Vec<CheckedFile>,
68}
69
70impl ValidateResult {
71    pub fn has_errors(&self) -> bool {
72        !self.errors.is_empty()
73    }
74
75    pub fn files_checked(&self) -> usize {
76        self.checked.len()
77    }
78}
79
80// ---------------------------------------------------------------------------
81// Internal types
82// ---------------------------------------------------------------------------
83
84/// A file that has been parsed and matched to a schema URI.
85struct ParsedFile {
86    path: String,
87    content: String,
88    instance: Value,
89    /// Original schema URI before rewrites (for override matching).
90    original_schema_uri: String,
91}
92
93// ---------------------------------------------------------------------------
94// Config loading
95// ---------------------------------------------------------------------------
96
97/// Locate `lintel.toml`, load the full config, and return the config directory.
98/// Returns `(config, config_dir, config_path)`.  When no config is found or
99/// cwd is unavailable the config is default and `config_path` is `None`.
100#[tracing::instrument(skip_all)]
101pub fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
102    let start_dir = match search_dir {
103        Some(d) => d.to_path_buf(),
104        None => match std::env::current_dir() {
105            Ok(d) => d,
106            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
107        },
108    };
109
110    let Some(config_path) = config::find_config_path(&start_dir) else {
111        return (config::Config::default(), start_dir, None);
112    };
113
114    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
115    let cfg = config::find_and_load(&start_dir)
116        .ok()
117        .flatten()
118        .unwrap_or_default();
119    (cfg, dir, Some(config_path))
120}
121
122// ---------------------------------------------------------------------------
123// File collection
124// ---------------------------------------------------------------------------
125
126/// Collect input files from globs/directories, applying exclude filters.
127#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
128fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
129    if globs.is_empty() {
130        return discover::discover_files(".", exclude);
131    }
132
133    let mut result = Vec::new();
134    for pattern in globs {
135        let path = Path::new(pattern);
136        if path.is_dir() {
137            result.extend(discover::discover_files(pattern, exclude)?);
138        } else {
139            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
140                let path = entry?;
141                if path.is_file() && !is_excluded(&path, exclude) {
142                    result.push(path);
143                }
144            }
145        }
146    }
147    Ok(result)
148}
149
150fn is_excluded(path: &Path, excludes: &[String]) -> bool {
151    let path_str = match path.to_str() {
152        Some(s) => s.strip_prefix("./").unwrap_or(s),
153        None => return false,
154    };
155    excludes
156        .iter()
157        .any(|pattern| glob_match::glob_match(pattern, path_str))
158}
159
160// ---------------------------------------------------------------------------
161// lintel.toml self-validation
162// ---------------------------------------------------------------------------
163
164/// Validate `lintel.toml` against its built-in schema.
165async fn validate_config(
166    config_path: &Path,
167    errors: &mut Vec<LintError>,
168    checked: &mut Vec<CheckedFile>,
169    on_check: &mut impl FnMut(&CheckedFile),
170) -> Result<()> {
171    let content = tokio::fs::read_to_string(config_path).await?;
172    let config_value: Value = toml::from_str(&content)
173        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
174    let schema_value: Value = serde_json::from_str(include_str!(concat!(
175        env!("OUT_DIR"),
176        "/lintel-config.schema.json"
177    )))
178    .context("failed to parse embedded lintel config schema")?;
179    if let Ok(validator) = jsonschema::options().build(&schema_value) {
180        let path_str = config_path.display().to_string();
181        for error in validator.iter_errors(&config_value) {
182            let ip = error.instance_path().to_string();
183            let span = find_instance_path_span(&content, &ip);
184            errors.push(LintError::Config {
185                src: miette::NamedSource::new(&path_str, content.clone()),
186                span: span.into(),
187                path: path_str.clone(),
188                instance_path: if ip.is_empty() {
189                    DEFAULT_LABEL.to_string()
190                } else {
191                    ip
192                },
193                message: clean_error_message(error.to_string()),
194            });
195        }
196        let cf = CheckedFile {
197            path: path_str,
198            schema: "(builtin)".to_string(),
199            cache_status: None,
200            validation_cache_status: None,
201        };
202        on_check(&cf);
203        checked.push(cf);
204    }
205    Ok(())
206}
207
208// ---------------------------------------------------------------------------
209// Phase 1: Parse files and resolve schema URIs
210// ---------------------------------------------------------------------------
211
212/// Try parsing content with each known format, returning the first success.
213///
214/// JSONC is tried first (superset of JSON, handles comments), then YAML and
215/// TOML which cover the most common config formats, followed by the rest.
216pub fn try_parse_all(content: &str, file_name: &str) -> Option<(parsers::FileFormat, Value)> {
217    use parsers::FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
218    const FORMATS: [parsers::FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
219
220    for fmt in FORMATS {
221        let parser = parsers::parser_for(fmt);
222        if let Ok(val) = parser.parse(content, file_name) {
223            return Some((fmt, val));
224        }
225    }
226    None
227}
228
229/// Result of processing a single file: either a parsed file with its schema URI,
230/// a lint error, or nothing (file was skipped).
231enum FileResult {
232    Parsed {
233        schema_uri: String,
234        parsed: ParsedFile,
235    },
236    Error(LintError),
237    Skip,
238}
239
240/// Process a single file's already-read content: parse and resolve schema URI.
241fn process_one_file(
242    path: &Path,
243    content: String,
244    config: &config::Config,
245    config_dir: &Path,
246    compiled_catalogs: &[CompiledCatalog],
247) -> FileResult {
248    let path_str = path.display().to_string();
249    let file_name = path
250        .file_name()
251        .and_then(|n| n.to_str())
252        .unwrap_or(&path_str);
253
254    let detected_format = parsers::detect_format(path);
255
256    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
257    if detected_format.is_none() {
258        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
259            || compiled_catalogs
260                .iter()
261                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
262        if !has_match {
263            return FileResult::Skip;
264        }
265    }
266
267    // Parse the file content.
268    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
269        let parser = parsers::parser_for(fmt);
270        match parser.parse(&content, &path_str) {
271            Ok(val) => (parser, val),
272            Err(parse_err) => {
273                // JSONC fallback for .json files that match a catalog entry.
274                if fmt == FileFormat::Json
275                    && compiled_catalogs
276                        .iter()
277                        .any(|cat| cat.find_schema(&path_str, file_name).is_some())
278                {
279                    match JsoncParser.parse(&content, &path_str) {
280                        Ok(val) => (parsers::parser_for(FileFormat::Jsonc), val),
281                        Err(jsonc_err) => return FileResult::Error(jsonc_err.into()),
282                    }
283                } else {
284                    return FileResult::Error(parse_err.into());
285                }
286            }
287        }
288    } else {
289        match try_parse_all(&content, &path_str) {
290            Some((fmt, val)) => (parsers::parser_for(fmt), val),
291            None => return FileResult::Skip,
292        }
293    };
294
295    // Skip markdown files with no frontmatter
296    if instance.is_null() {
297        return FileResult::Skip;
298    }
299
300    // Schema resolution priority:
301    // 1. Inline $schema / YAML modeline (always wins)
302    // 2. Custom schema mappings from lintel.toml [schemas]
303    // 3. Catalog matching (SchemaStore + additional registries)
304    let schema_uri = parser
305        .extract_schema_uri(&content, &instance)
306        .or_else(|| {
307            config
308                .find_schema_mapping(&path_str, file_name)
309                .map(str::to_string)
310        })
311        .or_else(|| {
312            compiled_catalogs
313                .iter()
314                .find_map(|cat| cat.find_schema(&path_str, file_name))
315                .map(str::to_string)
316        });
317
318    let Some(schema_uri) = schema_uri else {
319        return FileResult::Skip;
320    };
321
322    // Keep original URI for override matching (before rewrites)
323    let original_schema_uri = schema_uri.clone();
324
325    // Apply rewrite rules, then resolve // paths relative to lintel.toml
326    let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
327    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
328
329    // Resolve relative local paths against the file's parent directory.
330    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
331    let schema_uri = if is_remote {
332        schema_uri
333    } else {
334        path.parent()
335            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
336            .unwrap_or(schema_uri)
337    };
338
339    FileResult::Parsed {
340        schema_uri,
341        parsed: ParsedFile {
342            path: path_str,
343            content,
344            instance,
345            original_schema_uri,
346        },
347    }
348}
349
350/// Read each file concurrently with tokio, parse its content, extract its
351/// schema URI, apply rewrites, and group by resolved schema URI.
352#[tracing::instrument(skip_all, fields(file_count = files.len()))]
353async fn parse_and_group_files(
354    files: &[PathBuf],
355    config: &config::Config,
356    config_dir: &Path,
357    compiled_catalogs: &[CompiledCatalog],
358    errors: &mut Vec<LintError>,
359) -> BTreeMap<String, Vec<ParsedFile>> {
360    // Read all files concurrently using tokio async I/O, with a semaphore
361    // to avoid exhausting file descriptors on large directories.
362    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
363    let mut read_set = tokio::task::JoinSet::new();
364    for path in files {
365        let path = path.clone();
366        let sem = semaphore.clone();
367        read_set.spawn(async move {
368            let _permit = sem.acquire().await.expect("semaphore closed");
369            let result = tokio::fs::read_to_string(&path).await;
370            (path, result)
371        });
372    }
373
374    let mut file_contents = Vec::with_capacity(files.len());
375    while let Some(result) = read_set.join_next().await {
376        match result {
377            Ok(item) => file_contents.push(item),
378            Err(e) => tracing::warn!("file read task panicked: {e}"),
379        }
380    }
381
382    // Process files: parse content and resolve schema URIs.
383    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
384    for (path, content_result) in file_contents {
385        let content = match content_result {
386            Ok(c) => c,
387            Err(e) => {
388                errors.push(LintError::Io {
389                    path: path.display().to_string(),
390                    message: format!("failed to read: {e}"),
391                });
392                continue;
393            }
394        };
395        let result = process_one_file(&path, content, config, config_dir, compiled_catalogs);
396        match result {
397            FileResult::Parsed { schema_uri, parsed } => {
398                schema_groups.entry(schema_uri).or_default().push(parsed);
399            }
400            FileResult::Error(e) => errors.push(e),
401            FileResult::Skip => {}
402        }
403    }
404
405    schema_groups
406}
407
408// ---------------------------------------------------------------------------
409// Phase 2: Schema fetching, compilation, and instance validation
410// ---------------------------------------------------------------------------
411
412/// Fetch a schema by URI, returning its parsed JSON and cache status.
413///
414/// For remote URIs, checks the prefetched map first; for local URIs, reads
415/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
416async fn fetch_schema_from_prefetched(
417    schema_uri: &str,
418    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
419    local_cache: &mut HashMap<String, Value>,
420    group: &[ParsedFile],
421    errors: &mut Vec<LintError>,
422    checked: &mut Vec<CheckedFile>,
423    on_check: &mut impl FnMut(&CheckedFile),
424) -> Option<(Value, Option<CacheStatus>)> {
425    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
426
427    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
428        match prefetched.get(schema_uri) {
429            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
430            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
431            None => Err(format!("schema not prefetched: {schema_uri}")),
432        }
433    } else if let Some(cached) = local_cache.get(schema_uri) {
434        Ok((cached.clone(), None))
435    } else {
436        tokio::fs::read_to_string(schema_uri)
437            .await
438            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
439            .and_then(|content| {
440                serde_json::from_str::<Value>(&content)
441                    .map(|v| {
442                        local_cache.insert(schema_uri.to_string(), v.clone());
443                        (v, None)
444                    })
445                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
446            })
447    };
448
449    match result {
450        Ok(value) => Some(value),
451        Err(message) => {
452            report_group_error(
453                |path| LintError::SchemaFetch {
454                    path: path.to_string(),
455                    message: message.clone(),
456                },
457                schema_uri,
458                None,
459                group,
460                errors,
461                checked,
462                on_check,
463            );
464            None
465        }
466    }
467}
468
469/// Report the same error for every file in a schema group.
470fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
471    make_error: impl Fn(&str) -> LintError,
472    schema_uri: &str,
473    cache_status: Option<CacheStatus>,
474    group: &[P],
475    errors: &mut Vec<LintError>,
476    checked: &mut Vec<CheckedFile>,
477    on_check: &mut impl FnMut(&CheckedFile),
478) {
479    for item in group {
480        let pf = item.borrow();
481        let cf = CheckedFile {
482            path: pf.path.clone(),
483            schema: schema_uri.to_string(),
484            cache_status,
485            validation_cache_status: None,
486        };
487        on_check(&cf);
488        checked.push(cf);
489        errors.push(make_error(&pf.path));
490    }
491}
492
493/// Mark every file in a group as checked (no errors).
494fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
495    schema_uri: &str,
496    cache_status: Option<CacheStatus>,
497    validation_cache_status: Option<ValidationCacheStatus>,
498    group: &[P],
499    checked: &mut Vec<CheckedFile>,
500    on_check: &mut impl FnMut(&CheckedFile),
501) {
502    for item in group {
503        let pf = item.borrow();
504        let cf = CheckedFile {
505            path: pf.path.clone(),
506            schema: schema_uri.to_string(),
507            cache_status,
508            validation_cache_status,
509        };
510        on_check(&cf);
511        checked.push(cf);
512    }
513}
514
515/// Clean up error messages from the `jsonschema` crate.
516///
517/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
518/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
519/// The source snippet already shows the value, so we strip the redundant prefix
520/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
521///
522/// All other messages are returned unchanged.
523fn clean_error_message(msg: String) -> String {
524    const MARKER: &str = " is not valid under any of the schemas listed in the '";
525    if let Some(pos) = msg.find(MARKER) {
526        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
527        return msg[pos + 4..].to_string();
528    }
529    msg
530}
531
532/// Convert [`ValidationError`]s into [`LintError::Validation`] diagnostics.
533fn push_validation_errors(
534    pf: &ParsedFile,
535    schema_url: &str,
536    validation_errors: &[ValidationError],
537    errors: &mut Vec<LintError>,
538) {
539    for ve in validation_errors {
540        let span = find_instance_path_span(&pf.content, &ve.instance_path);
541        let instance_path = if ve.instance_path.is_empty() {
542            DEFAULT_LABEL.to_string()
543        } else {
544            ve.instance_path.clone()
545        };
546        let label = format_label(&instance_path, &ve.schema_path);
547        let source_span: miette::SourceSpan = span.into();
548        errors.push(LintError::Validation {
549            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
550            span: source_span,
551            schema_span: source_span,
552            path: pf.path.clone(),
553            instance_path,
554            label,
555            message: ve.message.clone(),
556            schema_url: schema_url.to_string(),
557            schema_path: ve.schema_path.clone(),
558        });
559    }
560}
561
562/// Validate all files in a group against an already-compiled validator and store
563/// results in the validation cache.
564#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
565#[allow(clippy::too_many_arguments)]
566async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
567    validator: &jsonschema::Validator,
568    schema_uri: &str,
569    schema_hash: &str,
570    validate_formats: bool,
571    cache_status: Option<CacheStatus>,
572    group: &[P],
573    vcache: &validation_cache::ValidationCache,
574    errors: &mut Vec<LintError>,
575    checked: &mut Vec<CheckedFile>,
576    on_check: &mut impl FnMut(&CheckedFile),
577) {
578    for item in group {
579        let pf = item.borrow();
580        let file_errors: Vec<ValidationError> = validator
581            .iter_errors(&pf.instance)
582            .map(|error| ValidationError {
583                instance_path: error.instance_path().to_string(),
584                message: clean_error_message(error.to_string()),
585                schema_path: error.schema_path().to_string(),
586            })
587            .collect();
588
589        vcache
590            .store(&pf.content, schema_hash, validate_formats, &file_errors)
591            .await;
592        push_validation_errors(pf, schema_uri, &file_errors, errors);
593
594        let cf = CheckedFile {
595            path: pf.path.clone(),
596            schema: schema_uri.to_string(),
597            cache_status,
598            validation_cache_status: Some(ValidationCacheStatus::Miss),
599        };
600        on_check(&cf);
601        checked.push(cf);
602    }
603}
604
605// ---------------------------------------------------------------------------
606// Public API
607// ---------------------------------------------------------------------------
608
609/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
610///
611/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
612pub async fn fetch_compiled_catalogs<C: HttpClient>(
613    retriever: &SchemaCache<C>,
614    config: &config::Config,
615    no_catalog: bool,
616) -> Vec<CompiledCatalog> {
617    let mut compiled_catalogs = Vec::new();
618
619    if !no_catalog {
620        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
621
622        #[allow(clippy::items_after_statements)]
623        type CatalogResult = (
624            String,
625            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
626        );
627        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
628
629        // Lintel catalog
630        if !config.no_default_catalog {
631            let r = retriever.clone();
632            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
633            catalog_tasks.spawn(async move {
634                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
635                    .await
636                    .map(|cat| CompiledCatalog::compile(&cat));
637                (label, result)
638            });
639        }
640
641        // SchemaStore catalog
642        let r = retriever.clone();
643        catalog_tasks.spawn(async move {
644            let result = catalog::fetch_catalog(&r)
645                .await
646                .map(|cat| CompiledCatalog::compile(&cat));
647            ("SchemaStore catalog".to_string(), result)
648        });
649
650        // Additional registries from lintel.toml
651        for registry_url in &config.registries {
652            let r = retriever.clone();
653            let url = registry_url.clone();
654            let label = format!("registry {url}");
655            catalog_tasks.spawn(async move {
656                let result = registry::fetch(&r, &url)
657                    .await
658                    .map(|cat| CompiledCatalog::compile(&cat));
659                (label, result)
660            });
661        }
662
663        while let Some(result) = catalog_tasks.join_next().await {
664            match result {
665                Ok((_, Ok(compiled))) => compiled_catalogs.push(compiled),
666                Ok((label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
667                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
668            }
669        }
670
671        drop(catalog_span);
672    }
673
674    compiled_catalogs
675}
676
677/// # Errors
678///
679/// Returns an error if file collection or schema validation encounters an I/O error.
680pub async fn run<C: HttpClient>(args: &ValidateArgs, client: C) -> Result<ValidateResult> {
681    run_with(args, client, |_| {}).await
682}
683
684/// Like [`run`], but calls `on_check` each time a file is checked, allowing
685/// callers to stream progress (e.g. verbose output) as files are processed.
686///
687/// # Errors
688///
689/// Returns an error if file collection or schema validation encounters an I/O error.
690#[tracing::instrument(skip_all, name = "validate")]
691#[allow(clippy::too_many_lines)]
692pub async fn run_with<C: HttpClient>(
693    args: &ValidateArgs,
694    client: C,
695    mut on_check: impl FnMut(&CheckedFile),
696) -> Result<ValidateResult> {
697    let cache_dir = match &args.cache_dir {
698        Some(dir) => {
699            let path = PathBuf::from(dir);
700            let _ = fs::create_dir_all(&path);
701            path
702        }
703        None => ensure_cache_dir(),
704    };
705    let retriever = SchemaCache::new(
706        Some(cache_dir),
707        client.clone(),
708        args.force_schema_fetch,
709        args.schema_cache_ttl,
710    );
711
712    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
713    let files = collect_files(&args.globs, &args.exclude)?;
714    tracing::info!(file_count = files.len(), "collected files");
715
716    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
717
718    let mut errors: Vec<LintError> = Vec::new();
719    let mut checked: Vec<CheckedFile> = Vec::new();
720
721    // Validate lintel.toml against its own schema
722    if let Some(config_path) = config_path {
723        validate_config(&config_path, &mut errors, &mut checked, &mut on_check).await?;
724    }
725
726    // Phase 1: Parse files and resolve schema URIs
727    let schema_groups = parse_and_group_files(
728        &files,
729        &config,
730        &config_dir,
731        &compiled_catalogs,
732        &mut errors,
733    )
734    .await;
735    tracing::info!(
736        schema_count = schema_groups.len(),
737        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
738        "grouped files by schema"
739    );
740
741    // Create validation cache
742    let vcache = validation_cache::ValidationCache::new(
743        validation_cache::ensure_cache_dir(),
744        args.force_validation,
745    );
746
747    // Prefetch all remote schemas in parallel
748    let remote_uris: Vec<&String> = schema_groups
749        .keys()
750        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
751        .collect();
752
753    let prefetched = {
754        let _prefetch_span =
755            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
756
757        let mut schema_tasks = tokio::task::JoinSet::new();
758        for uri in remote_uris {
759            let r = retriever.clone();
760            let u = uri.clone();
761            schema_tasks.spawn(async move {
762                let result = r.fetch(&u).await;
763                (u, result)
764            });
765        }
766
767        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
768        while let Some(result) = schema_tasks.join_next().await {
769            match result {
770                Ok((uri, fetch_result)) => {
771                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
772                }
773                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
774            }
775        }
776
777        prefetched
778    };
779
780    // Phase 2: Compile each schema once and validate all matching files
781    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
782    let mut fetch_time = core::time::Duration::ZERO;
783    let mut hash_time = core::time::Duration::ZERO;
784    let mut vcache_time = core::time::Duration::ZERO;
785    let mut compile_time = core::time::Duration::ZERO;
786    let mut validate_time = core::time::Duration::ZERO;
787
788    for (schema_uri, group) in &schema_groups {
789        let _group_span = tracing::debug_span!(
790            "schema_group",
791            schema = schema_uri.as_str(),
792            files = group.len(),
793        )
794        .entered();
795
796        // If ANY file in the group matches a `validate_formats = false` override,
797        // disable format validation for the whole group (they share one compiled validator).
798        let validate_formats = group.iter().all(|pf| {
799            config
800                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
801        });
802
803        // Remote schemas were prefetched in parallel above; local schemas are
804        // read from disk here (with in-memory caching).
805        let t = std::time::Instant::now();
806        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
807            schema_uri,
808            &prefetched,
809            &mut local_schema_cache,
810            group,
811            &mut errors,
812            &mut checked,
813            &mut on_check,
814        )
815        .await
816        else {
817            fetch_time += t.elapsed();
818            continue;
819        };
820        fetch_time += t.elapsed();
821
822        // Pre-compute schema hash once for the entire group.
823        let t = std::time::Instant::now();
824        let schema_hash = validation_cache::schema_hash(&schema_value);
825        hash_time += t.elapsed();
826
827        // Split the group into validation cache hits and misses.
828        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
829
830        let t = std::time::Instant::now();
831        for pf in group {
832            let (cached, vcache_status) = vcache
833                .lookup(&pf.content, &schema_hash, validate_formats)
834                .await;
835
836            if let Some(cached_errors) = cached {
837                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
838                let cf = CheckedFile {
839                    path: pf.path.clone(),
840                    schema: schema_uri.clone(),
841                    cache_status,
842                    validation_cache_status: Some(vcache_status),
843                };
844                on_check(&cf);
845                checked.push(cf);
846            } else {
847                cache_misses.push(pf);
848            }
849        }
850        vcache_time += t.elapsed();
851
852        tracing::debug!(
853            cache_hits = group.len() - cache_misses.len(),
854            cache_misses = cache_misses.len(),
855            "validation cache"
856        );
857
858        // If all files hit the validation cache, skip schema compilation entirely.
859        if cache_misses.is_empty() {
860            continue;
861        }
862
863        // Compile the schema for cache misses.
864        let t = std::time::Instant::now();
865        let validator = {
866            match jsonschema::async_options()
867                .with_retriever(retriever.clone())
868                .should_validate_formats(validate_formats)
869                .build(&schema_value)
870                .await
871            {
872                Ok(v) => v,
873                Err(e) => {
874                    compile_time += t.elapsed();
875                    // When format validation is disabled and the compilation error
876                    // is a uri-reference issue (e.g. Rust-style $ref paths in
877                    // vector.json), skip validation silently.
878                    if !validate_formats && e.to_string().contains("uri-reference") {
879                        mark_group_checked(
880                            schema_uri,
881                            cache_status,
882                            Some(ValidationCacheStatus::Miss),
883                            &cache_misses,
884                            &mut checked,
885                            &mut on_check,
886                        );
887                        continue;
888                    }
889                    let msg = format!("failed to compile schema: {e}");
890                    report_group_error(
891                        |path| LintError::SchemaCompile {
892                            path: path.to_string(),
893                            message: msg.clone(),
894                        },
895                        schema_uri,
896                        cache_status,
897                        &cache_misses,
898                        &mut errors,
899                        &mut checked,
900                        &mut on_check,
901                    );
902                    continue;
903                }
904            }
905        };
906        compile_time += t.elapsed();
907
908        let t = std::time::Instant::now();
909        validate_group(
910            &validator,
911            schema_uri,
912            &schema_hash,
913            validate_formats,
914            cache_status,
915            &cache_misses,
916            &vcache,
917            &mut errors,
918            &mut checked,
919            &mut on_check,
920        )
921        .await;
922        validate_time += t.elapsed();
923    }
924
925    #[allow(clippy::cast_possible_truncation)]
926    {
927        tracing::info!(
928            fetch_ms = fetch_time.as_millis() as u64,
929            hash_ms = hash_time.as_millis() as u64,
930            vcache_ms = vcache_time.as_millis() as u64,
931            compile_ms = compile_time.as_millis() as u64,
932            validate_ms = validate_time.as_millis() as u64,
933            "phase2 breakdown"
934        );
935    }
936
937    // Sort errors for deterministic output (by path, then by span offset)
938    errors.sort_by(|a, b| {
939        a.path()
940            .cmp(b.path())
941            .then_with(|| a.offset().cmp(&b.offset()))
942    });
943
944    Ok(ValidateResult { errors, checked })
945}
946
947#[cfg(test)]
948mod tests {
949    use super::*;
950    use crate::retriever::HttpClient;
951    use core::error::Error;
952    use std::collections::HashMap;
953    use std::path::Path;
954
955    #[derive(Clone)]
956    struct MockClient(HashMap<String, String>);
957
958    #[async_trait::async_trait]
959    impl HttpClient for MockClient {
960        async fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
961            self.0
962                .get(uri)
963                .cloned()
964                .ok_or_else(|| format!("mock: no response for {uri}").into())
965        }
966    }
967
968    fn mock(entries: &[(&str, &str)]) -> MockClient {
969        MockClient(
970            entries
971                .iter()
972                .map(|(k, v)| (k.to_string(), v.to_string()))
973                .collect(),
974        )
975    }
976
977    fn testdata() -> PathBuf {
978        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
979    }
980
981    /// Build glob patterns that scan one or more testdata directories for all supported file types.
982    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
983        dirs.iter()
984            .flat_map(|dir| {
985                let base = testdata().join(dir);
986                vec![
987                    base.join("*.json").to_string_lossy().to_string(),
988                    base.join("*.yaml").to_string_lossy().to_string(),
989                    base.join("*.yml").to_string_lossy().to_string(),
990                    base.join("*.json5").to_string_lossy().to_string(),
991                    base.join("*.jsonc").to_string_lossy().to_string(),
992                    base.join("*.toml").to_string_lossy().to_string(),
993                ]
994            })
995            .collect()
996    }
997
998    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
999        ValidateArgs {
1000            globs: scenario_globs(dirs),
1001            exclude: vec![],
1002            cache_dir: None,
1003            force_schema_fetch: true,
1004            force_validation: true,
1005            no_catalog: true,
1006            config_dir: None,
1007            schema_cache_ttl: None,
1008        }
1009    }
1010
1011    const SCHEMA: &str =
1012        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1013
1014    fn schema_mock() -> MockClient {
1015        mock(&[("https://example.com/schema.json", SCHEMA)])
1016    }
1017
1018    // --- Directory scanning tests ---
1019
1020    #[tokio::test]
1021    async fn no_matching_files() -> anyhow::Result<()> {
1022        let tmp = tempfile::tempdir()?;
1023        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1024        let c = ValidateArgs {
1025            globs: vec![pattern],
1026            exclude: vec![],
1027            cache_dir: None,
1028            force_schema_fetch: true,
1029            force_validation: true,
1030            no_catalog: true,
1031            config_dir: None,
1032            schema_cache_ttl: None,
1033        };
1034        let result = run(&c, mock(&[])).await?;
1035        assert!(!result.has_errors());
1036        Ok(())
1037    }
1038
1039    #[tokio::test]
1040    async fn dir_all_valid() -> anyhow::Result<()> {
1041        let c = args_for_dirs(&["positive_tests"]);
1042        let result = run(&c, schema_mock()).await?;
1043        assert!(!result.has_errors());
1044        Ok(())
1045    }
1046
1047    #[tokio::test]
1048    async fn dir_all_invalid() -> anyhow::Result<()> {
1049        let c = args_for_dirs(&["negative_tests"]);
1050        let result = run(&c, schema_mock()).await?;
1051        assert!(result.has_errors());
1052        Ok(())
1053    }
1054
1055    #[tokio::test]
1056    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1057        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1058        let result = run(&c, schema_mock()).await?;
1059        assert!(result.has_errors());
1060        Ok(())
1061    }
1062
1063    #[tokio::test]
1064    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1065        let c = args_for_dirs(&["no_schema"]);
1066        let result = run(&c, mock(&[])).await?;
1067        assert!(!result.has_errors());
1068        Ok(())
1069    }
1070
1071    #[tokio::test]
1072    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1073        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1074        let result = run(&c, schema_mock()).await?;
1075        assert!(!result.has_errors());
1076        Ok(())
1077    }
1078
1079    // --- Directory as positional arg ---
1080
1081    #[tokio::test]
1082    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1083        let dir = testdata().join("positive_tests");
1084        let c = ValidateArgs {
1085            globs: vec![dir.to_string_lossy().to_string()],
1086            exclude: vec![],
1087            cache_dir: None,
1088            force_schema_fetch: true,
1089            force_validation: true,
1090            no_catalog: true,
1091            config_dir: None,
1092            schema_cache_ttl: None,
1093        };
1094        let result = run(&c, schema_mock()).await?;
1095        assert!(!result.has_errors());
1096        assert!(result.files_checked() > 0);
1097        Ok(())
1098    }
1099
1100    #[tokio::test]
1101    async fn multiple_directory_args() -> anyhow::Result<()> {
1102        let pos_dir = testdata().join("positive_tests");
1103        let no_schema_dir = testdata().join("no_schema");
1104        let c = ValidateArgs {
1105            globs: vec![
1106                pos_dir.to_string_lossy().to_string(),
1107                no_schema_dir.to_string_lossy().to_string(),
1108            ],
1109            exclude: vec![],
1110            cache_dir: None,
1111            force_schema_fetch: true,
1112            force_validation: true,
1113            no_catalog: true,
1114            config_dir: None,
1115            schema_cache_ttl: None,
1116        };
1117        let result = run(&c, schema_mock()).await?;
1118        assert!(!result.has_errors());
1119        Ok(())
1120    }
1121
1122    #[tokio::test]
1123    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1124        let dir = testdata().join("positive_tests");
1125        let glob_pattern = testdata()
1126            .join("no_schema")
1127            .join("*.json")
1128            .to_string_lossy()
1129            .to_string();
1130        let c = ValidateArgs {
1131            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1132            exclude: vec![],
1133            cache_dir: None,
1134            force_schema_fetch: true,
1135            force_validation: true,
1136            no_catalog: true,
1137            config_dir: None,
1138            schema_cache_ttl: None,
1139        };
1140        let result = run(&c, schema_mock()).await?;
1141        assert!(!result.has_errors());
1142        Ok(())
1143    }
1144
1145    #[tokio::test]
1146    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1147        let base = testdata().join("malformed");
1148        let c = ValidateArgs {
1149            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1150            exclude: vec![],
1151            cache_dir: None,
1152            force_schema_fetch: true,
1153            force_validation: true,
1154            no_catalog: true,
1155            config_dir: None,
1156            schema_cache_ttl: None,
1157        };
1158        let result = run(&c, mock(&[])).await?;
1159        assert!(result.has_errors());
1160        Ok(())
1161    }
1162
1163    #[tokio::test]
1164    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1165        let base = testdata().join("malformed");
1166        let c = ValidateArgs {
1167            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1168            exclude: vec![],
1169            cache_dir: None,
1170            force_schema_fetch: true,
1171            force_validation: true,
1172            no_catalog: true,
1173            config_dir: None,
1174            schema_cache_ttl: None,
1175        };
1176        let result = run(&c, mock(&[])).await?;
1177        assert!(result.has_errors());
1178        Ok(())
1179    }
1180
1181    // --- Exclude filter ---
1182
1183    #[tokio::test]
1184    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1185        let base = testdata().join("negative_tests");
1186        let c = ValidateArgs {
1187            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1188            exclude: vec![
1189                base.join("missing_name.json").to_string_lossy().to_string(),
1190                base.join("missing_name.toml").to_string_lossy().to_string(),
1191                base.join("missing_name.yaml").to_string_lossy().to_string(),
1192            ],
1193            cache_dir: None,
1194            force_schema_fetch: true,
1195            force_validation: true,
1196            no_catalog: true,
1197            config_dir: None,
1198            schema_cache_ttl: None,
1199        };
1200        let result = run(&c, schema_mock()).await?;
1201        assert!(!result.has_errors());
1202        Ok(())
1203    }
1204
1205    // --- Cache options ---
1206
1207    #[tokio::test]
1208    async fn custom_cache_dir() -> anyhow::Result<()> {
1209        let cache_tmp = tempfile::tempdir()?;
1210        let c = ValidateArgs {
1211            globs: scenario_globs(&["positive_tests"]),
1212            exclude: vec![],
1213            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1214            force_schema_fetch: true,
1215            force_validation: true,
1216            no_catalog: true,
1217            config_dir: None,
1218            schema_cache_ttl: None,
1219        };
1220        let result = run(&c, schema_mock()).await?;
1221        assert!(!result.has_errors());
1222
1223        // Schema was fetched once and cached
1224        let entries: Vec<_> = fs::read_dir(cache_tmp.path())?.collect();
1225        assert_eq!(entries.len(), 1);
1226        Ok(())
1227    }
1228
1229    // --- Local schema ---
1230
1231    #[tokio::test]
1232    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1233        let tmp = tempfile::tempdir()?;
1234        let schema_path = tmp.path().join("schema.json");
1235        fs::write(&schema_path, SCHEMA)?;
1236
1237        let f = tmp.path().join("valid.json");
1238        fs::write(
1239            &f,
1240            format!(
1241                r#"{{"$schema":"{}","name":"hello"}}"#,
1242                schema_path.to_string_lossy()
1243            ),
1244        )?;
1245
1246        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1247        let c = ValidateArgs {
1248            globs: vec![pattern],
1249            exclude: vec![],
1250            cache_dir: None,
1251            force_schema_fetch: true,
1252            force_validation: true,
1253            no_catalog: true,
1254            config_dir: None,
1255            schema_cache_ttl: None,
1256        };
1257        let result = run(&c, mock(&[])).await?;
1258        assert!(!result.has_errors());
1259        Ok(())
1260    }
1261
1262    #[tokio::test]
1263    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1264        let tmp = tempfile::tempdir()?;
1265        let schema_path = tmp.path().join("schema.json");
1266        fs::write(&schema_path, SCHEMA)?;
1267
1268        let f = tmp.path().join("valid.yaml");
1269        fs::write(
1270            &f,
1271            format!(
1272                "# yaml-language-server: $schema={}\nname: hello\n",
1273                schema_path.to_string_lossy()
1274            ),
1275        )?;
1276
1277        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1278        let c = ValidateArgs {
1279            globs: vec![pattern],
1280            exclude: vec![],
1281            cache_dir: None,
1282            force_schema_fetch: true,
1283            force_validation: true,
1284            no_catalog: true,
1285            config_dir: None,
1286            schema_cache_ttl: None,
1287        };
1288        let result = run(&c, mock(&[])).await?;
1289        assert!(!result.has_errors());
1290        Ok(())
1291    }
1292
1293    #[tokio::test]
1294    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1295        let tmp = tempfile::tempdir()?;
1296        let f = tmp.path().join("ref.json");
1297        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1298
1299        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1300        let c = ValidateArgs {
1301            globs: vec![pattern],
1302            exclude: vec![],
1303            cache_dir: None,
1304            force_schema_fetch: true,
1305            force_validation: true,
1306            no_catalog: true,
1307            config_dir: None,
1308            schema_cache_ttl: None,
1309        };
1310        let result = run(&c, mock(&[])).await?;
1311        assert!(result.has_errors());
1312        Ok(())
1313    }
1314
1315    // --- JSON5 / JSONC tests ---
1316
1317    #[tokio::test]
1318    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1319        let tmp = tempfile::tempdir()?;
1320        let schema_path = tmp.path().join("schema.json");
1321        fs::write(&schema_path, SCHEMA)?;
1322
1323        let f = tmp.path().join("config.json5");
1324        fs::write(
1325            &f,
1326            format!(
1327                r#"{{
1328  // JSON5 comment
1329  "$schema": "{}",
1330  name: "hello",
1331}}"#,
1332                schema_path.to_string_lossy()
1333            ),
1334        )?;
1335
1336        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1337        let c = ValidateArgs {
1338            globs: vec![pattern],
1339            exclude: vec![],
1340            cache_dir: None,
1341            force_schema_fetch: true,
1342            force_validation: true,
1343            no_catalog: true,
1344            config_dir: None,
1345            schema_cache_ttl: None,
1346        };
1347        let result = run(&c, mock(&[])).await?;
1348        assert!(!result.has_errors());
1349        Ok(())
1350    }
1351
1352    #[tokio::test]
1353    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1354        let tmp = tempfile::tempdir()?;
1355        let schema_path = tmp.path().join("schema.json");
1356        fs::write(&schema_path, SCHEMA)?;
1357
1358        let f = tmp.path().join("config.jsonc");
1359        fs::write(
1360            &f,
1361            format!(
1362                r#"{{
1363  /* JSONC comment */
1364  "$schema": "{}",
1365  "name": "hello"
1366}}"#,
1367                schema_path.to_string_lossy()
1368            ),
1369        )?;
1370
1371        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1372        let c = ValidateArgs {
1373            globs: vec![pattern],
1374            exclude: vec![],
1375            cache_dir: None,
1376            force_schema_fetch: true,
1377            force_validation: true,
1378            no_catalog: true,
1379            config_dir: None,
1380            schema_cache_ttl: None,
1381        };
1382        let result = run(&c, mock(&[])).await?;
1383        assert!(!result.has_errors());
1384        Ok(())
1385    }
1386
1387    // --- Catalog-based schema matching ---
1388
1389    const GH_WORKFLOW_SCHEMA: &str = r#"{
1390        "type": "object",
1391        "properties": {
1392            "name": { "type": "string" },
1393            "on": {},
1394            "jobs": { "type": "object" }
1395        },
1396        "required": ["on", "jobs"]
1397    }"#;
1398
1399    fn gh_catalog_json() -> String {
1400        r#"{"schemas":[{
1401            "name": "GitHub Workflow",
1402            "url": "https://www.schemastore.org/github-workflow.json",
1403            "fileMatch": [
1404                "**/.github/workflows/*.yml",
1405                "**/.github/workflows/*.yaml"
1406            ]
1407        }]}"#
1408            .to_string()
1409    }
1410
1411    #[tokio::test]
1412    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1413        let tmp = tempfile::tempdir()?;
1414        let cache_tmp = tempfile::tempdir()?;
1415        let wf_dir = tmp.path().join(".github/workflows");
1416        fs::create_dir_all(&wf_dir)?;
1417        fs::write(
1418            wf_dir.join("ci.yml"),
1419            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1420        )?;
1421
1422        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1423        let client = mock(&[
1424            (
1425                "https://www.schemastore.org/api/json/catalog.json",
1426                &gh_catalog_json(),
1427            ),
1428            (
1429                "https://www.schemastore.org/github-workflow.json",
1430                GH_WORKFLOW_SCHEMA,
1431            ),
1432        ]);
1433        let c = ValidateArgs {
1434            globs: vec![pattern],
1435            exclude: vec![],
1436            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1437            force_schema_fetch: true,
1438            force_validation: true,
1439            no_catalog: false,
1440            config_dir: None,
1441            schema_cache_ttl: None,
1442        };
1443        let result = run(&c, client).await?;
1444        assert!(!result.has_errors());
1445        Ok(())
1446    }
1447
1448    #[tokio::test]
1449    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1450        let tmp = tempfile::tempdir()?;
1451        let cache_tmp = tempfile::tempdir()?;
1452        let wf_dir = tmp.path().join(".github/workflows");
1453        fs::create_dir_all(&wf_dir)?;
1454        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1455
1456        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1457        let client = mock(&[
1458            (
1459                "https://www.schemastore.org/api/json/catalog.json",
1460                &gh_catalog_json(),
1461            ),
1462            (
1463                "https://www.schemastore.org/github-workflow.json",
1464                GH_WORKFLOW_SCHEMA,
1465            ),
1466        ]);
1467        let c = ValidateArgs {
1468            globs: vec![pattern],
1469            exclude: vec![],
1470            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1471            force_schema_fetch: true,
1472            force_validation: true,
1473            no_catalog: false,
1474            config_dir: None,
1475            schema_cache_ttl: None,
1476        };
1477        let result = run(&c, client).await?;
1478        assert!(result.has_errors());
1479        Ok(())
1480    }
1481
1482    #[tokio::test]
1483    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1484        let tmp = tempfile::tempdir()?;
1485        let cache_tmp = tempfile::tempdir()?;
1486        let wf_dir = tmp.path().join(".github/workflows");
1487        fs::create_dir_all(&wf_dir)?;
1488        fs::write(
1489            wf_dir.join("ci.yml"),
1490            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1491        )?;
1492
1493        let client = mock(&[
1494            (
1495                "https://www.schemastore.org/api/json/catalog.json",
1496                &gh_catalog_json(),
1497            ),
1498            (
1499                "https://www.schemastore.org/github-workflow.json",
1500                GH_WORKFLOW_SCHEMA,
1501            ),
1502        ]);
1503        let c = ValidateArgs {
1504            globs: vec![],
1505            exclude: vec![],
1506            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1507            force_schema_fetch: true,
1508            force_validation: true,
1509            no_catalog: false,
1510            config_dir: None,
1511            schema_cache_ttl: None,
1512        };
1513
1514        let orig_dir = std::env::current_dir()?;
1515        std::env::set_current_dir(tmp.path())?;
1516        let result = run(&c, client).await?;
1517        std::env::set_current_dir(orig_dir)?;
1518
1519        assert!(!result.has_errors());
1520        Ok(())
1521    }
1522
1523    // --- TOML tests ---
1524
1525    #[tokio::test]
1526    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1527        let tmp = tempfile::tempdir()?;
1528        let schema_path = tmp.path().join("schema.json");
1529        fs::write(&schema_path, SCHEMA)?;
1530
1531        let f = tmp.path().join("config.toml");
1532        fs::write(
1533            &f,
1534            format!(
1535                "# :schema {}\nname = \"hello\"\n",
1536                schema_path.to_string_lossy()
1537            ),
1538        )?;
1539
1540        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1541        let c = ValidateArgs {
1542            globs: vec![pattern],
1543            exclude: vec![],
1544            cache_dir: None,
1545            force_schema_fetch: true,
1546            force_validation: true,
1547            no_catalog: true,
1548            config_dir: None,
1549            schema_cache_ttl: None,
1550        };
1551        let result = run(&c, mock(&[])).await?;
1552        assert!(!result.has_errors());
1553        Ok(())
1554    }
1555
1556    // --- Rewrite rules + // resolution ---
1557
1558    #[tokio::test]
1559    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1560        let tmp = tempfile::tempdir()?;
1561
1562        let schemas_dir = tmp.path().join("schemas");
1563        fs::create_dir_all(&schemas_dir)?;
1564        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1565
1566        fs::write(
1567            tmp.path().join("lintel.toml"),
1568            r#"
1569[rewrite]
1570"http://localhost:9000/" = "//schemas/"
1571"#,
1572        )?;
1573
1574        let f = tmp.path().join("config.json");
1575        fs::write(
1576            &f,
1577            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1578        )?;
1579
1580        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1581        let c = ValidateArgs {
1582            globs: vec![pattern],
1583            exclude: vec![],
1584            cache_dir: None,
1585            force_schema_fetch: true,
1586            force_validation: true,
1587            no_catalog: true,
1588            config_dir: Some(tmp.path().to_path_buf()),
1589            schema_cache_ttl: None,
1590        };
1591
1592        let result = run(&c, mock(&[])).await?;
1593        assert!(!result.has_errors());
1594        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1595        Ok(())
1596    }
1597
1598    #[tokio::test]
1599    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1600        let tmp = tempfile::tempdir()?;
1601
1602        let schemas_dir = tmp.path().join("schemas");
1603        fs::create_dir_all(&schemas_dir)?;
1604        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1605
1606        fs::write(tmp.path().join("lintel.toml"), "")?;
1607
1608        let sub = tmp.path().join("deeply/nested");
1609        fs::create_dir_all(&sub)?;
1610        let f = sub.join("config.json");
1611        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1612
1613        let pattern = sub.join("*.json").to_string_lossy().to_string();
1614        let c = ValidateArgs {
1615            globs: vec![pattern],
1616            exclude: vec![],
1617            cache_dir: None,
1618            force_schema_fetch: true,
1619            force_validation: true,
1620            no_catalog: true,
1621            config_dir: Some(tmp.path().to_path_buf()),
1622            schema_cache_ttl: None,
1623        };
1624
1625        let result = run(&c, mock(&[])).await?;
1626        assert!(!result.has_errors());
1627        Ok(())
1628    }
1629
1630    // --- Format validation override ---
1631
1632    const FORMAT_SCHEMA: &str = r#"{
1633        "type": "object",
1634        "properties": {
1635            "link": { "type": "string", "format": "uri-reference" }
1636        }
1637    }"#;
1638
1639    #[tokio::test]
1640    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1641        let tmp = tempfile::tempdir()?;
1642        let schema_path = tmp.path().join("schema.json");
1643        fs::write(&schema_path, FORMAT_SCHEMA)?;
1644
1645        let f = tmp.path().join("data.json");
1646        fs::write(
1647            &f,
1648            format!(
1649                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1650                schema_path.to_string_lossy()
1651            ),
1652        )?;
1653
1654        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1655        let c = ValidateArgs {
1656            globs: vec![pattern],
1657            exclude: vec![],
1658            cache_dir: None,
1659            force_schema_fetch: true,
1660            force_validation: true,
1661            no_catalog: true,
1662            config_dir: Some(tmp.path().to_path_buf()),
1663            schema_cache_ttl: None,
1664        };
1665        let result = run(&c, mock(&[])).await?;
1666        assert!(
1667            result.has_errors(),
1668            "expected format error without override"
1669        );
1670        Ok(())
1671    }
1672
1673    #[tokio::test]
1674    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1675        let tmp = tempfile::tempdir()?;
1676        let schema_path = tmp.path().join("schema.json");
1677        fs::write(&schema_path, FORMAT_SCHEMA)?;
1678
1679        let f = tmp.path().join("data.json");
1680        fs::write(
1681            &f,
1682            format!(
1683                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1684                schema_path.to_string_lossy()
1685            ),
1686        )?;
1687
1688        // Use **/data.json to match the absolute path from the tempdir.
1689        fs::write(
1690            tmp.path().join("lintel.toml"),
1691            r#"
1692[[override]]
1693files = ["**/data.json"]
1694validate_formats = false
1695"#,
1696        )?;
1697
1698        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1699        let c = ValidateArgs {
1700            globs: vec![pattern],
1701            exclude: vec![],
1702            cache_dir: None,
1703            force_schema_fetch: true,
1704            force_validation: true,
1705            no_catalog: true,
1706            config_dir: Some(tmp.path().to_path_buf()),
1707            schema_cache_ttl: None,
1708        };
1709        let result = run(&c, mock(&[])).await?;
1710        assert!(
1711            !result.has_errors(),
1712            "expected no errors with validate_formats = false override"
1713        );
1714        Ok(())
1715    }
1716
1717    // --- Unrecognized extension handling ---
1718
1719    #[tokio::test]
1720    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1721        let tmp = tempfile::tempdir()?;
1722        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1723
1724        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1725        let c = ValidateArgs {
1726            globs: vec![pattern],
1727            exclude: vec![],
1728            cache_dir: None,
1729            force_schema_fetch: true,
1730            force_validation: true,
1731            no_catalog: true,
1732            config_dir: Some(tmp.path().to_path_buf()),
1733            schema_cache_ttl: None,
1734        };
1735        let result = run(&c, mock(&[])).await?;
1736        assert!(!result.has_errors());
1737        assert_eq!(result.files_checked(), 0);
1738        Ok(())
1739    }
1740
1741    #[tokio::test]
1742    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1743        let tmp = tempfile::tempdir()?;
1744        let cache_tmp = tempfile::tempdir()?;
1745        // File has .cfg extension (unrecognized) but content is valid JSON
1746        fs::write(
1747            tmp.path().join("myapp.cfg"),
1748            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1749        )?;
1750
1751        let catalog_json = r#"{"schemas":[{
1752            "name": "MyApp Config",
1753            "url": "https://example.com/myapp.schema.json",
1754            "fileMatch": ["*.cfg"]
1755        }]}"#;
1756        let schema =
1757            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1758
1759        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1760        let client = mock(&[
1761            (
1762                "https://www.schemastore.org/api/json/catalog.json",
1763                catalog_json,
1764            ),
1765            ("https://example.com/myapp.schema.json", schema),
1766        ]);
1767        let c = ValidateArgs {
1768            globs: vec![pattern],
1769            exclude: vec![],
1770            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1771            force_schema_fetch: true,
1772            force_validation: true,
1773            no_catalog: false,
1774            config_dir: Some(tmp.path().to_path_buf()),
1775            schema_cache_ttl: None,
1776        };
1777        let result = run(&c, client).await?;
1778        assert!(!result.has_errors());
1779        assert_eq!(result.files_checked(), 1);
1780        Ok(())
1781    }
1782
1783    #[tokio::test]
1784    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1785        let tmp = tempfile::tempdir()?;
1786        let cache_tmp = tempfile::tempdir()?;
1787        // File matches catalog but content isn't parseable by any format
1788        fs::write(
1789            tmp.path().join("myapp.cfg"),
1790            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1791        )?;
1792
1793        let catalog_json = r#"{"schemas":[{
1794            "name": "MyApp Config",
1795            "url": "https://example.com/myapp.schema.json",
1796            "fileMatch": ["*.cfg"]
1797        }]}"#;
1798
1799        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1800        let client = mock(&[(
1801            "https://www.schemastore.org/api/json/catalog.json",
1802            catalog_json,
1803        )]);
1804        let c = ValidateArgs {
1805            globs: vec![pattern],
1806            exclude: vec![],
1807            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1808            force_schema_fetch: true,
1809            force_validation: true,
1810            no_catalog: false,
1811            config_dir: Some(tmp.path().to_path_buf()),
1812            schema_cache_ttl: None,
1813        };
1814        let result = run(&c, client).await?;
1815        assert!(!result.has_errors());
1816        assert_eq!(result.files_checked(), 0);
1817        Ok(())
1818    }
1819
1820    #[tokio::test]
1821    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1822        let tmp = tempfile::tempdir()?;
1823        let cache_tmp = tempfile::tempdir()?;
1824        // File has .cfg extension, content is valid JSON but fails schema validation
1825        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1826
1827        let catalog_json = r#"{"schemas":[{
1828            "name": "MyApp Config",
1829            "url": "https://example.com/myapp.schema.json",
1830            "fileMatch": ["*.cfg"]
1831        }]}"#;
1832        let schema =
1833            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1834
1835        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1836        let client = mock(&[
1837            (
1838                "https://www.schemastore.org/api/json/catalog.json",
1839                catalog_json,
1840            ),
1841            ("https://example.com/myapp.schema.json", schema),
1842        ]);
1843        let c = ValidateArgs {
1844            globs: vec![pattern],
1845            exclude: vec![],
1846            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1847            force_schema_fetch: true,
1848            force_validation: true,
1849            no_catalog: false,
1850            config_dir: Some(tmp.path().to_path_buf()),
1851            schema_cache_ttl: None,
1852        };
1853        let result = run(&c, client).await?;
1854        assert!(result.has_errors());
1855        assert_eq!(result.files_checked(), 1);
1856        Ok(())
1857    }
1858
1859    // --- Validation cache ---
1860
1861    #[tokio::test]
1862    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1863        let tmp = tempfile::tempdir()?;
1864        let schema_path = tmp.path().join("schema.json");
1865        fs::write(&schema_path, SCHEMA)?;
1866
1867        let f = tmp.path().join("valid.json");
1868        fs::write(
1869            &f,
1870            format!(
1871                r#"{{"$schema":"{}","name":"hello"}}"#,
1872                schema_path.to_string_lossy()
1873            ),
1874        )?;
1875
1876        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1877
1878        // First run: force_validation = false so results get cached
1879        let c = ValidateArgs {
1880            globs: vec![pattern.clone()],
1881            exclude: vec![],
1882            cache_dir: None,
1883            force_schema_fetch: true,
1884            force_validation: false,
1885            no_catalog: true,
1886            config_dir: None,
1887            schema_cache_ttl: None,
1888        };
1889        let mut first_statuses = Vec::new();
1890        let result = run_with(&c, mock(&[]), |cf| {
1891            first_statuses.push(cf.validation_cache_status);
1892        })
1893        .await?;
1894        assert!(!result.has_errors());
1895        assert!(result.files_checked() > 0);
1896
1897        // Verify the first run recorded a validation cache miss
1898        assert!(
1899            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
1900            "expected at least one validation cache miss on first run"
1901        );
1902
1903        // Second run: same file, same schema — should hit validation cache
1904        let mut second_statuses = Vec::new();
1905        let result = run_with(&c, mock(&[]), |cf| {
1906            second_statuses.push(cf.validation_cache_status);
1907        })
1908        .await?;
1909        assert!(!result.has_errors());
1910
1911        // Verify the second run got a validation cache hit
1912        assert!(
1913            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
1914            "expected at least one validation cache hit on second run"
1915        );
1916        Ok(())
1917    }
1918
1919    // --- clean_error_message ---
1920
1921    #[test]
1922    fn clean_strips_anyof_value() {
1923        let msg =
1924            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
1925        assert_eq!(
1926            clean_error_message(msg.to_string()),
1927            "not valid under any of the schemas listed in the 'anyOf' keyword"
1928        );
1929    }
1930
1931    #[test]
1932    fn clean_strips_oneof_value() {
1933        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
1934        assert_eq!(
1935            clean_error_message(msg.to_string()),
1936            "not valid under any of the schemas listed in the 'oneOf' keyword"
1937        );
1938    }
1939
1940    #[test]
1941    fn clean_strips_long_value() {
1942        let long_value = "x".repeat(5000);
1943        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
1944        let msg = format!("{long_value}{suffix}");
1945        assert_eq!(
1946            clean_error_message(msg),
1947            "not valid under any of the schemas listed in the 'anyOf' keyword"
1948        );
1949    }
1950
1951    #[test]
1952    fn clean_preserves_type_error() {
1953        let msg = r#"12345 is not of types "null", "string""#;
1954        assert_eq!(clean_error_message(msg.to_string()), msg);
1955    }
1956
1957    #[test]
1958    fn clean_preserves_required_property() {
1959        let msg = "\"name\" is a required property";
1960        assert_eq!(clean_error_message(msg.to_string()), msg);
1961    }
1962}