Skip to main content

lintel_check/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog::{self, CompiledCatalog};
11use crate::config;
12use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
13use crate::discover;
14use crate::parsers::{self, FileFormat, JsoncParser, Parser};
15use crate::registry;
16use crate::retriever::{CacheStatus, SchemaCache};
17use crate::validation_cache::{self, ValidationCacheStatus, ValidationError};
18
19/// Conservative limit for concurrent file reads to avoid exhausting file
20/// descriptors. 128 is well below the default soft limit on macOS (256) and
21/// Linux (1024) while still providing good throughput.
22const FD_CONCURRENCY_LIMIT: usize = 128;
23
24pub struct ValidateArgs {
25    /// Glob patterns to find files (empty = auto-discover)
26    pub globs: Vec<String>,
27
28    /// Exclude files matching these globs (repeatable)
29    pub exclude: Vec<String>,
30
31    /// Cache directory for remote schemas
32    pub cache_dir: Option<String>,
33
34    /// Bypass schema cache reads (still writes fetched schemas to cache)
35    pub force_schema_fetch: bool,
36
37    /// Bypass validation cache reads (still writes results to cache)
38    pub force_validation: bool,
39
40    /// Disable `SchemaStore` catalog matching
41    pub no_catalog: bool,
42
43    /// Directory to search for `lintel.toml` (defaults to cwd)
44    pub config_dir: Option<PathBuf>,
45
46    /// TTL for cached schemas. `None` means no expiry.
47    pub schema_cache_ttl: Option<core::time::Duration>,
48}
49
50/// Re-exported from [`crate::diagnostics::LintError`] for backwards
51/// compatibility with existing `use lintel_check::validate::LintError` paths.
52pub use crate::diagnostics::LintError;
53
54/// A file that was checked and the schema it resolved to.
55pub struct CheckedFile {
56    pub path: String,
57    pub schema: String,
58    /// `None` for local schemas and builtins; `Some` for remote schemas.
59    pub cache_status: Option<CacheStatus>,
60    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
61    pub validation_cache_status: Option<ValidationCacheStatus>,
62}
63
64/// Result of a validation run.
65pub struct ValidateResult {
66    pub errors: Vec<LintError>,
67    pub checked: Vec<CheckedFile>,
68}
69
70impl ValidateResult {
71    pub fn has_errors(&self) -> bool {
72        !self.errors.is_empty()
73    }
74
75    pub fn files_checked(&self) -> usize {
76        self.checked.len()
77    }
78}
79
80// ---------------------------------------------------------------------------
81// Internal types
82// ---------------------------------------------------------------------------
83
84/// A file that has been parsed and matched to a schema URI.
85struct ParsedFile {
86    path: String,
87    content: String,
88    instance: Value,
89    /// Original schema URI before rewrites (for override matching).
90    original_schema_uri: String,
91}
92
93// ---------------------------------------------------------------------------
94// Config loading
95// ---------------------------------------------------------------------------
96
97/// Locate `lintel.toml`, load the full config, and return the config directory.
98/// Returns `(config, config_dir, config_path)`.  When no config is found or
99/// cwd is unavailable the config is default and `config_path` is `None`.
100#[tracing::instrument(skip_all)]
101pub fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
102    let start_dir = match search_dir {
103        Some(d) => d.to_path_buf(),
104        None => match std::env::current_dir() {
105            Ok(d) => d,
106            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
107        },
108    };
109
110    let Some(config_path) = config::find_config_path(&start_dir) else {
111        return (config::Config::default(), start_dir, None);
112    };
113
114    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
115    let cfg = config::find_and_load(&start_dir)
116        .ok()
117        .flatten()
118        .unwrap_or_default();
119    (cfg, dir, Some(config_path))
120}
121
122// ---------------------------------------------------------------------------
123// File collection
124// ---------------------------------------------------------------------------
125
126/// Collect input files from globs/directories, applying exclude filters.
127///
128/// # Errors
129///
130/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
131#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
132pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
133    if globs.is_empty() {
134        return discover::discover_files(".", exclude);
135    }
136
137    let mut result = Vec::new();
138    for pattern in globs {
139        let path = Path::new(pattern);
140        if path.is_dir() {
141            result.extend(discover::discover_files(pattern, exclude)?);
142        } else {
143            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
144                let path = entry?;
145                if path.is_file() && !is_excluded(&path, exclude) {
146                    result.push(path);
147                }
148            }
149        }
150    }
151    Ok(result)
152}
153
154fn is_excluded(path: &Path, excludes: &[String]) -> bool {
155    let path_str = match path.to_str() {
156        Some(s) => s.strip_prefix("./").unwrap_or(s),
157        None => return false,
158    };
159    excludes
160        .iter()
161        .any(|pattern| glob_match::glob_match(pattern, path_str))
162}
163
164// ---------------------------------------------------------------------------
165// lintel.toml self-validation
166// ---------------------------------------------------------------------------
167
168/// Validate `lintel.toml` against its built-in schema.
169async fn validate_config(
170    config_path: &Path,
171    errors: &mut Vec<LintError>,
172    checked: &mut Vec<CheckedFile>,
173    on_check: &mut impl FnMut(&CheckedFile),
174) -> Result<()> {
175    let content = tokio::fs::read_to_string(config_path).await?;
176    let config_value: Value = toml::from_str(&content)
177        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
178    let schema_value: Value = serde_json::from_str(include_str!(concat!(
179        env!("OUT_DIR"),
180        "/lintel-config.schema.json"
181    )))
182    .context("failed to parse embedded lintel config schema")?;
183    if let Ok(validator) = jsonschema::options().build(&schema_value) {
184        let path_str = config_path.display().to_string();
185        for error in validator.iter_errors(&config_value) {
186            let ip = error.instance_path().to_string();
187            let span = find_instance_path_span(&content, &ip);
188            errors.push(LintError::Config {
189                src: miette::NamedSource::new(&path_str, content.clone()),
190                span: span.into(),
191                path: path_str.clone(),
192                instance_path: if ip.is_empty() {
193                    DEFAULT_LABEL.to_string()
194                } else {
195                    ip
196                },
197                message: clean_error_message(error.to_string()),
198            });
199        }
200        let cf = CheckedFile {
201            path: path_str,
202            schema: "(builtin)".to_string(),
203            cache_status: None,
204            validation_cache_status: None,
205        };
206        on_check(&cf);
207        checked.push(cf);
208    }
209    Ok(())
210}
211
212// ---------------------------------------------------------------------------
213// Phase 1: Parse files and resolve schema URIs
214// ---------------------------------------------------------------------------
215
216/// Try parsing content with each known format, returning the first success.
217///
218/// JSONC is tried first (superset of JSON, handles comments), then YAML and
219/// TOML which cover the most common config formats, followed by the rest.
220pub fn try_parse_all(content: &str, file_name: &str) -> Option<(parsers::FileFormat, Value)> {
221    use parsers::FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
222    const FORMATS: [parsers::FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
223
224    for fmt in FORMATS {
225        let parser = parsers::parser_for(fmt);
226        if let Ok(val) = parser.parse(content, file_name) {
227            return Some((fmt, val));
228        }
229    }
230    None
231}
232
233/// Result of processing a single file: either a parsed file with its schema URI,
234/// a lint error, or nothing (file was skipped).
235enum FileResult {
236    Parsed {
237        schema_uri: String,
238        parsed: ParsedFile,
239    },
240    Error(LintError),
241    Skip,
242}
243
244/// Process a single file's already-read content: parse and resolve schema URI.
245#[allow(clippy::too_many_arguments)]
246fn process_one_file(
247    path: &Path,
248    content: String,
249    config: &config::Config,
250    config_dir: &Path,
251    compiled_catalogs: &[CompiledCatalog],
252) -> FileResult {
253    let path_str = path.display().to_string();
254    let file_name = path
255        .file_name()
256        .and_then(|n| n.to_str())
257        .unwrap_or(&path_str);
258
259    let detected_format = parsers::detect_format(path);
260
261    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
262    if detected_format.is_none() {
263        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
264            || compiled_catalogs
265                .iter()
266                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
267        if !has_match {
268            return FileResult::Skip;
269        }
270    }
271
272    // Parse the file content.
273    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
274        let parser = parsers::parser_for(fmt);
275        match parser.parse(&content, &path_str) {
276            Ok(val) => (parser, val),
277            Err(parse_err) => {
278                // JSONC fallback for .json files that match a catalog entry.
279                if fmt == FileFormat::Json
280                    && compiled_catalogs
281                        .iter()
282                        .any(|cat| cat.find_schema(&path_str, file_name).is_some())
283                {
284                    match JsoncParser.parse(&content, &path_str) {
285                        Ok(val) => (parsers::parser_for(FileFormat::Jsonc), val),
286                        Err(jsonc_err) => return FileResult::Error(jsonc_err.into()),
287                    }
288                } else {
289                    return FileResult::Error(parse_err.into());
290                }
291            }
292        }
293    } else {
294        match try_parse_all(&content, &path_str) {
295            Some((fmt, val)) => (parsers::parser_for(fmt), val),
296            None => return FileResult::Skip,
297        }
298    };
299
300    // Skip markdown files with no frontmatter
301    if instance.is_null() {
302        return FileResult::Skip;
303    }
304
305    // Schema resolution priority:
306    // 1. Inline $schema / YAML modeline (always wins)
307    // 2. Custom schema mappings from lintel.toml [schemas]
308    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
309    let schema_uri = parser
310        .extract_schema_uri(&content, &instance)
311        .or_else(|| {
312            config
313                .find_schema_mapping(&path_str, file_name)
314                .map(str::to_string)
315        })
316        .or_else(|| {
317            compiled_catalogs
318                .iter()
319                .find_map(|cat| cat.find_schema(&path_str, file_name))
320                .map(str::to_string)
321        });
322
323    let Some(schema_uri) = schema_uri else {
324        return FileResult::Skip;
325    };
326
327    // Keep original URI for override matching (before rewrites)
328    let original_schema_uri = schema_uri.clone();
329
330    // Apply rewrite rules, then resolve // paths relative to lintel.toml
331    let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
332    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
333
334    // Resolve relative local paths against the file's parent directory.
335    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
336    let schema_uri = if is_remote {
337        schema_uri
338    } else {
339        path.parent()
340            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
341            .unwrap_or(schema_uri)
342    };
343
344    FileResult::Parsed {
345        schema_uri,
346        parsed: ParsedFile {
347            path: path_str,
348            content,
349            instance,
350            original_schema_uri,
351        },
352    }
353}
354
355/// Read each file concurrently with tokio, parse its content, extract its
356/// schema URI, apply rewrites, and group by resolved schema URI.
357#[tracing::instrument(skip_all, fields(file_count = files.len()))]
358#[allow(clippy::too_many_arguments)]
359async fn parse_and_group_files(
360    files: &[PathBuf],
361    config: &config::Config,
362    config_dir: &Path,
363    compiled_catalogs: &[CompiledCatalog],
364    errors: &mut Vec<LintError>,
365) -> BTreeMap<String, Vec<ParsedFile>> {
366    // Read all files concurrently using tokio async I/O, with a semaphore
367    // to avoid exhausting file descriptors on large directories.
368    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
369    let mut read_set = tokio::task::JoinSet::new();
370    for path in files {
371        let path = path.clone();
372        let sem = semaphore.clone();
373        read_set.spawn(async move {
374            let _permit = sem.acquire().await.expect("semaphore closed");
375            let result = tokio::fs::read_to_string(&path).await;
376            (path, result)
377        });
378    }
379
380    let mut file_contents = Vec::with_capacity(files.len());
381    while let Some(result) = read_set.join_next().await {
382        match result {
383            Ok(item) => file_contents.push(item),
384            Err(e) => tracing::warn!("file read task panicked: {e}"),
385        }
386    }
387
388    // Process files: parse content and resolve schema URIs.
389    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
390    for (path, content_result) in file_contents {
391        let content = match content_result {
392            Ok(c) => c,
393            Err(e) => {
394                errors.push(LintError::Io {
395                    path: path.display().to_string(),
396                    message: format!("failed to read: {e}"),
397                });
398                continue;
399            }
400        };
401        let result = process_one_file(&path, content, config, config_dir, compiled_catalogs);
402        match result {
403            FileResult::Parsed { schema_uri, parsed } => {
404                schema_groups.entry(schema_uri).or_default().push(parsed);
405            }
406            FileResult::Error(e) => errors.push(e),
407            FileResult::Skip => {}
408        }
409    }
410
411    schema_groups
412}
413
414// ---------------------------------------------------------------------------
415// Phase 2: Schema fetching, compilation, and instance validation
416// ---------------------------------------------------------------------------
417
418/// Fetch a schema by URI, returning its parsed JSON and cache status.
419///
420/// For remote URIs, checks the prefetched map first; for local URIs, reads
421/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
422#[allow(clippy::too_many_arguments)]
423async fn fetch_schema_from_prefetched(
424    schema_uri: &str,
425    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
426    local_cache: &mut HashMap<String, Value>,
427    group: &[ParsedFile],
428    errors: &mut Vec<LintError>,
429    checked: &mut Vec<CheckedFile>,
430    on_check: &mut impl FnMut(&CheckedFile),
431) -> Option<(Value, Option<CacheStatus>)> {
432    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
433
434    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
435        match prefetched.get(schema_uri) {
436            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
437            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
438            None => Err(format!("schema not prefetched: {schema_uri}")),
439        }
440    } else if let Some(cached) = local_cache.get(schema_uri) {
441        Ok((cached.clone(), None))
442    } else {
443        tokio::fs::read_to_string(schema_uri)
444            .await
445            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
446            .and_then(|content| {
447                serde_json::from_str::<Value>(&content)
448                    .map(|v| {
449                        local_cache.insert(schema_uri.to_string(), v.clone());
450                        (v, None)
451                    })
452                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
453            })
454    };
455
456    match result {
457        Ok(value) => Some(value),
458        Err(message) => {
459            report_group_error(
460                |path| LintError::SchemaFetch {
461                    path: path.to_string(),
462                    message: message.clone(),
463                },
464                schema_uri,
465                None,
466                group,
467                errors,
468                checked,
469                on_check,
470            );
471            None
472        }
473    }
474}
475
476/// Report the same error for every file in a schema group.
477#[allow(clippy::too_many_arguments)]
478fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
479    make_error: impl Fn(&str) -> LintError,
480    schema_uri: &str,
481    cache_status: Option<CacheStatus>,
482    group: &[P],
483    errors: &mut Vec<LintError>,
484    checked: &mut Vec<CheckedFile>,
485    on_check: &mut impl FnMut(&CheckedFile),
486) {
487    for item in group {
488        let pf = item.borrow();
489        let cf = CheckedFile {
490            path: pf.path.clone(),
491            schema: schema_uri.to_string(),
492            cache_status,
493            validation_cache_status: None,
494        };
495        on_check(&cf);
496        checked.push(cf);
497        errors.push(make_error(&pf.path));
498    }
499}
500
501/// Mark every file in a group as checked (no errors).
502#[allow(clippy::too_many_arguments)]
503fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
504    schema_uri: &str,
505    cache_status: Option<CacheStatus>,
506    validation_cache_status: Option<ValidationCacheStatus>,
507    group: &[P],
508    checked: &mut Vec<CheckedFile>,
509    on_check: &mut impl FnMut(&CheckedFile),
510) {
511    for item in group {
512        let pf = item.borrow();
513        let cf = CheckedFile {
514            path: pf.path.clone(),
515            schema: schema_uri.to_string(),
516            cache_status,
517            validation_cache_status,
518        };
519        on_check(&cf);
520        checked.push(cf);
521    }
522}
523
524/// Clean up error messages from the `jsonschema` crate.
525///
526/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
527/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
528/// The source snippet already shows the value, so we strip the redundant prefix
529/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
530///
531/// All other messages are returned unchanged.
532fn clean_error_message(msg: String) -> String {
533    const MARKER: &str = " is not valid under any of the schemas listed in the '";
534    if let Some(pos) = msg.find(MARKER) {
535        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
536        return msg[pos + 4..].to_string();
537    }
538    msg
539}
540
541/// Convert [`ValidationError`]s into [`LintError::Validation`] diagnostics.
542fn push_validation_errors(
543    pf: &ParsedFile,
544    schema_url: &str,
545    validation_errors: &[ValidationError],
546    errors: &mut Vec<LintError>,
547) {
548    for ve in validation_errors {
549        let span = find_instance_path_span(&pf.content, &ve.instance_path);
550        let instance_path = if ve.instance_path.is_empty() {
551            DEFAULT_LABEL.to_string()
552        } else {
553            ve.instance_path.clone()
554        };
555        let label = format_label(&instance_path, &ve.schema_path);
556        let source_span: miette::SourceSpan = span.into();
557        errors.push(LintError::Validation {
558            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
559            span: source_span,
560            schema_span: source_span,
561            path: pf.path.clone(),
562            instance_path,
563            label,
564            message: ve.message.clone(),
565            schema_url: schema_url.to_string(),
566            schema_path: ve.schema_path.clone(),
567        });
568    }
569}
570
571/// Validate all files in a group against an already-compiled validator and store
572/// results in the validation cache.
573#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
574#[allow(clippy::too_many_arguments)]
575async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
576    validator: &jsonschema::Validator,
577    schema_uri: &str,
578    schema_hash: &str,
579    validate_formats: bool,
580    cache_status: Option<CacheStatus>,
581    group: &[P],
582    vcache: &validation_cache::ValidationCache,
583    errors: &mut Vec<LintError>,
584    checked: &mut Vec<CheckedFile>,
585    on_check: &mut impl FnMut(&CheckedFile),
586) {
587    for item in group {
588        let pf = item.borrow();
589        let file_errors: Vec<ValidationError> = validator
590            .iter_errors(&pf.instance)
591            .map(|error| ValidationError {
592                instance_path: error.instance_path().to_string(),
593                message: clean_error_message(error.to_string()),
594                schema_path: error.schema_path().to_string(),
595            })
596            .collect();
597
598        vcache
599            .store(
600                &validation_cache::CacheKey {
601                    file_content: &pf.content,
602                    schema_hash,
603                    validate_formats,
604                },
605                &file_errors,
606            )
607            .await;
608        push_validation_errors(pf, schema_uri, &file_errors, errors);
609
610        let cf = CheckedFile {
611            path: pf.path.clone(),
612            schema: schema_uri.to_string(),
613            cache_status,
614            validation_cache_status: Some(ValidationCacheStatus::Miss),
615        };
616        on_check(&cf);
617        checked.push(cf);
618    }
619}
620
621// ---------------------------------------------------------------------------
622// Public API
623// ---------------------------------------------------------------------------
624
625/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
626///
627/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
628pub async fn fetch_compiled_catalogs(
629    retriever: &SchemaCache,
630    config: &config::Config,
631    no_catalog: bool,
632) -> Vec<CompiledCatalog> {
633    let mut compiled_catalogs = Vec::new();
634
635    if !no_catalog {
636        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
637
638        // Catalogs are fetched concurrently but sorted by priority so that
639        // the Lintel catalog wins over custom registries, which win over
640        // SchemaStore.  The `order` field encodes this precedence.
641        #[allow(clippy::items_after_statements)]
642        type CatalogResult = (
643            usize, // priority (lower = higher precedence)
644            String,
645            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
646        );
647        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
648
649        // Custom registries from lintel.toml (highest precedence among catalogs)
650        for (i, registry_url) in config.registries.iter().enumerate() {
651            let r = retriever.clone();
652            let url = registry_url.clone();
653            let label = format!("registry {url}");
654            catalog_tasks.spawn(async move {
655                let result = registry::fetch(&r, &url)
656                    .await
657                    .map(|cat| CompiledCatalog::compile(&cat));
658                (i, label, result)
659            });
660        }
661
662        // Lintel catalog
663        let lintel_order = config.registries.len();
664        if !config.no_default_catalog {
665            let r = retriever.clone();
666            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
667            catalog_tasks.spawn(async move {
668                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
669                    .await
670                    .map(|cat| CompiledCatalog::compile(&cat));
671                (lintel_order, label, result)
672            });
673        }
674
675        // SchemaStore catalog (lowest precedence)
676        let schemastore_order = config.registries.len() + 1;
677        let r = retriever.clone();
678        catalog_tasks.spawn(async move {
679            let result = catalog::fetch_catalog(&r)
680                .await
681                .map(|cat| CompiledCatalog::compile(&cat));
682            (schemastore_order, "SchemaStore catalog".to_string(), result)
683        });
684
685        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
686        while let Some(result) = catalog_tasks.join_next().await {
687            match result {
688                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
689                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
690                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
691            }
692        }
693        results.sort_by_key(|(order, _)| *order);
694        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
695
696        drop(catalog_span);
697    }
698
699    compiled_catalogs
700}
701
702/// # Errors
703///
704/// Returns an error if file collection or schema validation encounters an I/O error.
705pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
706    run_with(args, None, |_| {}).await
707}
708
709/// Like [`run`], but calls `on_check` each time a file is checked, allowing
710/// callers to stream progress (e.g. verbose output) as files are processed.
711///
712/// # Errors
713///
714/// Returns an error if file collection or schema validation encounters an I/O error.
715#[tracing::instrument(skip_all, name = "validate")]
716#[allow(clippy::too_many_lines)]
717pub async fn run_with(
718    args: &ValidateArgs,
719    cache: Option<SchemaCache>,
720    mut on_check: impl FnMut(&CheckedFile),
721) -> Result<ValidateResult> {
722    let retriever = if let Some(c) = cache {
723        c
724    } else {
725        let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
726        if let Some(dir) = &args.cache_dir {
727            let path = PathBuf::from(dir);
728            let _ = fs::create_dir_all(&path);
729            builder = builder.cache_dir(path);
730        }
731        if let Some(ttl) = args.schema_cache_ttl {
732            builder = builder.ttl(ttl);
733        }
734        builder.build()
735    };
736
737    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
738    let files = collect_files(&args.globs, &args.exclude)?;
739    tracing::info!(file_count = files.len(), "collected files");
740
741    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
742
743    let mut errors: Vec<LintError> = Vec::new();
744    let mut checked: Vec<CheckedFile> = Vec::new();
745
746    // Validate lintel.toml against its own schema
747    if let Some(config_path) = config_path {
748        validate_config(&config_path, &mut errors, &mut checked, &mut on_check).await?;
749    }
750
751    // Phase 1: Parse files and resolve schema URIs
752    let schema_groups = parse_and_group_files(
753        &files,
754        &config,
755        &config_dir,
756        &compiled_catalogs,
757        &mut errors,
758    )
759    .await;
760    tracing::info!(
761        schema_count = schema_groups.len(),
762        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
763        "grouped files by schema"
764    );
765
766    // Create validation cache
767    let vcache = validation_cache::ValidationCache::new(
768        validation_cache::ensure_cache_dir(),
769        args.force_validation,
770    );
771
772    // Prefetch all remote schemas in parallel
773    let remote_uris: Vec<&String> = schema_groups
774        .keys()
775        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
776        .collect();
777
778    let prefetched = {
779        let _prefetch_span =
780            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
781
782        let mut schema_tasks = tokio::task::JoinSet::new();
783        for uri in remote_uris {
784            let r = retriever.clone();
785            let u = uri.clone();
786            schema_tasks.spawn(async move {
787                let result = r.fetch(&u).await;
788                (u, result)
789            });
790        }
791
792        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
793        while let Some(result) = schema_tasks.join_next().await {
794            match result {
795                Ok((uri, fetch_result)) => {
796                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
797                }
798                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
799            }
800        }
801
802        prefetched
803    };
804
805    // Phase 2: Compile each schema once and validate all matching files
806    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
807    let mut fetch_time = core::time::Duration::ZERO;
808    let mut hash_time = core::time::Duration::ZERO;
809    let mut vcache_time = core::time::Duration::ZERO;
810    let mut compile_time = core::time::Duration::ZERO;
811    let mut validate_time = core::time::Duration::ZERO;
812
813    for (schema_uri, group) in &schema_groups {
814        let _group_span = tracing::debug_span!(
815            "schema_group",
816            schema = schema_uri.as_str(),
817            files = group.len(),
818        )
819        .entered();
820
821        // If ANY file in the group matches a `validate_formats = false` override,
822        // disable format validation for the whole group (they share one compiled validator).
823        let validate_formats = group.iter().all(|pf| {
824            config
825                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
826        });
827
828        // Remote schemas were prefetched in parallel above; local schemas are
829        // read from disk here (with in-memory caching).
830        let t = std::time::Instant::now();
831        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
832            schema_uri,
833            &prefetched,
834            &mut local_schema_cache,
835            group,
836            &mut errors,
837            &mut checked,
838            &mut on_check,
839        )
840        .await
841        else {
842            fetch_time += t.elapsed();
843            continue;
844        };
845        fetch_time += t.elapsed();
846
847        // Pre-compute schema hash once for the entire group.
848        let t = std::time::Instant::now();
849        let schema_hash = validation_cache::schema_hash(&schema_value);
850        hash_time += t.elapsed();
851
852        // Split the group into validation cache hits and misses.
853        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
854
855        let t = std::time::Instant::now();
856        for pf in group {
857            let (cached, vcache_status) = vcache
858                .lookup(&validation_cache::CacheKey {
859                    file_content: &pf.content,
860                    schema_hash: &schema_hash,
861                    validate_formats,
862                })
863                .await;
864
865            if let Some(cached_errors) = cached {
866                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
867                let cf = CheckedFile {
868                    path: pf.path.clone(),
869                    schema: schema_uri.clone(),
870                    cache_status,
871                    validation_cache_status: Some(vcache_status),
872                };
873                on_check(&cf);
874                checked.push(cf);
875            } else {
876                cache_misses.push(pf);
877            }
878        }
879        vcache_time += t.elapsed();
880
881        tracing::debug!(
882            cache_hits = group.len() - cache_misses.len(),
883            cache_misses = cache_misses.len(),
884            "validation cache"
885        );
886
887        // If all files hit the validation cache, skip schema compilation entirely.
888        if cache_misses.is_empty() {
889            continue;
890        }
891
892        // Compile the schema for cache misses.
893        let t = std::time::Instant::now();
894        let validator = {
895            match jsonschema::async_options()
896                .with_retriever(retriever.clone())
897                .should_validate_formats(validate_formats)
898                .build(&schema_value)
899                .await
900            {
901                Ok(v) => v,
902                Err(e) => {
903                    compile_time += t.elapsed();
904                    // When format validation is disabled and the compilation error
905                    // is a uri-reference issue (e.g. Rust-style $ref paths in
906                    // vector.json), skip validation silently.
907                    if !validate_formats && e.to_string().contains("uri-reference") {
908                        mark_group_checked(
909                            schema_uri,
910                            cache_status,
911                            Some(ValidationCacheStatus::Miss),
912                            &cache_misses,
913                            &mut checked,
914                            &mut on_check,
915                        );
916                        continue;
917                    }
918                    let msg = format!("failed to compile schema: {e}");
919                    report_group_error(
920                        |path| LintError::SchemaCompile {
921                            path: path.to_string(),
922                            message: msg.clone(),
923                        },
924                        schema_uri,
925                        cache_status,
926                        &cache_misses,
927                        &mut errors,
928                        &mut checked,
929                        &mut on_check,
930                    );
931                    continue;
932                }
933            }
934        };
935        compile_time += t.elapsed();
936
937        let t = std::time::Instant::now();
938        validate_group(
939            &validator,
940            schema_uri,
941            &schema_hash,
942            validate_formats,
943            cache_status,
944            &cache_misses,
945            &vcache,
946            &mut errors,
947            &mut checked,
948            &mut on_check,
949        )
950        .await;
951        validate_time += t.elapsed();
952    }
953
954    #[allow(clippy::cast_possible_truncation)]
955    {
956        tracing::info!(
957            fetch_ms = fetch_time.as_millis() as u64,
958            hash_ms = hash_time.as_millis() as u64,
959            vcache_ms = vcache_time.as_millis() as u64,
960            compile_ms = compile_time.as_millis() as u64,
961            validate_ms = validate_time.as_millis() as u64,
962            "phase2 breakdown"
963        );
964    }
965
966    // Sort errors for deterministic output (by path, then by span offset)
967    errors.sort_by(|a, b| {
968        a.path()
969            .cmp(b.path())
970            .then_with(|| a.offset().cmp(&b.offset()))
971    });
972
973    Ok(ValidateResult { errors, checked })
974}
975
976#[cfg(test)]
977mod tests {
978    use super::*;
979    use crate::retriever::SchemaCache;
980    use std::path::Path;
981
982    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
983        let cache = SchemaCache::memory();
984        for (uri, body) in entries {
985            cache.insert(
986                uri,
987                serde_json::from_str(body).expect("test mock: invalid JSON"),
988            );
989        }
990        cache
991    }
992
993    fn testdata() -> PathBuf {
994        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
995    }
996
997    /// Build glob patterns that scan one or more testdata directories for all supported file types.
998    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
999        dirs.iter()
1000            .flat_map(|dir| {
1001                let base = testdata().join(dir);
1002                vec![
1003                    base.join("*.json").to_string_lossy().to_string(),
1004                    base.join("*.yaml").to_string_lossy().to_string(),
1005                    base.join("*.yml").to_string_lossy().to_string(),
1006                    base.join("*.json5").to_string_lossy().to_string(),
1007                    base.join("*.jsonc").to_string_lossy().to_string(),
1008                    base.join("*.toml").to_string_lossy().to_string(),
1009                ]
1010            })
1011            .collect()
1012    }
1013
1014    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1015        ValidateArgs {
1016            globs: scenario_globs(dirs),
1017            exclude: vec![],
1018            cache_dir: None,
1019            force_schema_fetch: true,
1020            force_validation: true,
1021            no_catalog: true,
1022            config_dir: None,
1023            schema_cache_ttl: None,
1024        }
1025    }
1026
1027    const SCHEMA: &str =
1028        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1029
1030    fn schema_mock() -> SchemaCache {
1031        mock(&[("https://example.com/schema.json", SCHEMA)])
1032    }
1033
1034    // --- Directory scanning tests ---
1035
1036    #[tokio::test]
1037    async fn no_matching_files() -> anyhow::Result<()> {
1038        let tmp = tempfile::tempdir()?;
1039        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1040        let c = ValidateArgs {
1041            globs: vec![pattern],
1042            exclude: vec![],
1043            cache_dir: None,
1044            force_schema_fetch: true,
1045            force_validation: true,
1046            no_catalog: true,
1047            config_dir: None,
1048            schema_cache_ttl: None,
1049        };
1050        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1051        assert!(!result.has_errors());
1052        Ok(())
1053    }
1054
1055    #[tokio::test]
1056    async fn dir_all_valid() -> anyhow::Result<()> {
1057        let c = args_for_dirs(&["positive_tests"]);
1058        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1059        assert!(!result.has_errors());
1060        Ok(())
1061    }
1062
1063    #[tokio::test]
1064    async fn dir_all_invalid() -> anyhow::Result<()> {
1065        let c = args_for_dirs(&["negative_tests"]);
1066        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1067        assert!(result.has_errors());
1068        Ok(())
1069    }
1070
1071    #[tokio::test]
1072    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1073        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1074        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1075        assert!(result.has_errors());
1076        Ok(())
1077    }
1078
1079    #[tokio::test]
1080    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1081        let c = args_for_dirs(&["no_schema"]);
1082        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1083        assert!(!result.has_errors());
1084        Ok(())
1085    }
1086
1087    #[tokio::test]
1088    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1089        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1090        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1091        assert!(!result.has_errors());
1092        Ok(())
1093    }
1094
1095    // --- Directory as positional arg ---
1096
1097    #[tokio::test]
1098    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1099        let dir = testdata().join("positive_tests");
1100        let c = ValidateArgs {
1101            globs: vec![dir.to_string_lossy().to_string()],
1102            exclude: vec![],
1103            cache_dir: None,
1104            force_schema_fetch: true,
1105            force_validation: true,
1106            no_catalog: true,
1107            config_dir: None,
1108            schema_cache_ttl: None,
1109        };
1110        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1111        assert!(!result.has_errors());
1112        assert!(result.files_checked() > 0);
1113        Ok(())
1114    }
1115
1116    #[tokio::test]
1117    async fn multiple_directory_args() -> anyhow::Result<()> {
1118        let pos_dir = testdata().join("positive_tests");
1119        let no_schema_dir = testdata().join("no_schema");
1120        let c = ValidateArgs {
1121            globs: vec![
1122                pos_dir.to_string_lossy().to_string(),
1123                no_schema_dir.to_string_lossy().to_string(),
1124            ],
1125            exclude: vec![],
1126            cache_dir: None,
1127            force_schema_fetch: true,
1128            force_validation: true,
1129            no_catalog: true,
1130            config_dir: None,
1131            schema_cache_ttl: None,
1132        };
1133        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1134        assert!(!result.has_errors());
1135        Ok(())
1136    }
1137
1138    #[tokio::test]
1139    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1140        let dir = testdata().join("positive_tests");
1141        let glob_pattern = testdata()
1142            .join("no_schema")
1143            .join("*.json")
1144            .to_string_lossy()
1145            .to_string();
1146        let c = ValidateArgs {
1147            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1148            exclude: vec![],
1149            cache_dir: None,
1150            force_schema_fetch: true,
1151            force_validation: true,
1152            no_catalog: true,
1153            config_dir: None,
1154            schema_cache_ttl: None,
1155        };
1156        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1157        assert!(!result.has_errors());
1158        Ok(())
1159    }
1160
1161    #[tokio::test]
1162    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1163        let base = testdata().join("malformed");
1164        let c = ValidateArgs {
1165            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1166            exclude: vec![],
1167            cache_dir: None,
1168            force_schema_fetch: true,
1169            force_validation: true,
1170            no_catalog: true,
1171            config_dir: None,
1172            schema_cache_ttl: None,
1173        };
1174        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1175        assert!(result.has_errors());
1176        Ok(())
1177    }
1178
1179    #[tokio::test]
1180    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1181        let base = testdata().join("malformed");
1182        let c = ValidateArgs {
1183            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1184            exclude: vec![],
1185            cache_dir: None,
1186            force_schema_fetch: true,
1187            force_validation: true,
1188            no_catalog: true,
1189            config_dir: None,
1190            schema_cache_ttl: None,
1191        };
1192        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1193        assert!(result.has_errors());
1194        Ok(())
1195    }
1196
1197    // --- Exclude filter ---
1198
1199    #[tokio::test]
1200    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1201        let base = testdata().join("negative_tests");
1202        let c = ValidateArgs {
1203            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1204            exclude: vec![
1205                base.join("missing_name.json").to_string_lossy().to_string(),
1206                base.join("missing_name.toml").to_string_lossy().to_string(),
1207                base.join("missing_name.yaml").to_string_lossy().to_string(),
1208            ],
1209            cache_dir: None,
1210            force_schema_fetch: true,
1211            force_validation: true,
1212            no_catalog: true,
1213            config_dir: None,
1214            schema_cache_ttl: None,
1215        };
1216        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1217        assert!(!result.has_errors());
1218        Ok(())
1219    }
1220
1221    // --- Cache options ---
1222
1223    #[tokio::test]
1224    async fn custom_cache_dir() -> anyhow::Result<()> {
1225        let c = ValidateArgs {
1226            globs: scenario_globs(&["positive_tests"]),
1227            exclude: vec![],
1228            cache_dir: None,
1229            force_schema_fetch: true,
1230            force_validation: true,
1231            no_catalog: true,
1232            config_dir: None,
1233            schema_cache_ttl: None,
1234        };
1235        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1236        assert!(!result.has_errors());
1237        Ok(())
1238    }
1239
1240    // --- Local schema ---
1241
1242    #[tokio::test]
1243    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1244        let tmp = tempfile::tempdir()?;
1245        let schema_path = tmp.path().join("schema.json");
1246        fs::write(&schema_path, SCHEMA)?;
1247
1248        let f = tmp.path().join("valid.json");
1249        fs::write(
1250            &f,
1251            format!(
1252                r#"{{"$schema":"{}","name":"hello"}}"#,
1253                schema_path.to_string_lossy()
1254            ),
1255        )?;
1256
1257        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1258        let c = ValidateArgs {
1259            globs: vec![pattern],
1260            exclude: vec![],
1261            cache_dir: None,
1262            force_schema_fetch: true,
1263            force_validation: true,
1264            no_catalog: true,
1265            config_dir: None,
1266            schema_cache_ttl: None,
1267        };
1268        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1269        assert!(!result.has_errors());
1270        Ok(())
1271    }
1272
1273    #[tokio::test]
1274    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1275        let tmp = tempfile::tempdir()?;
1276        let schema_path = tmp.path().join("schema.json");
1277        fs::write(&schema_path, SCHEMA)?;
1278
1279        let f = tmp.path().join("valid.yaml");
1280        fs::write(
1281            &f,
1282            format!(
1283                "# yaml-language-server: $schema={}\nname: hello\n",
1284                schema_path.to_string_lossy()
1285            ),
1286        )?;
1287
1288        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1289        let c = ValidateArgs {
1290            globs: vec![pattern],
1291            exclude: vec![],
1292            cache_dir: None,
1293            force_schema_fetch: true,
1294            force_validation: true,
1295            no_catalog: true,
1296            config_dir: None,
1297            schema_cache_ttl: None,
1298        };
1299        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1300        assert!(!result.has_errors());
1301        Ok(())
1302    }
1303
1304    #[tokio::test]
1305    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1306        let tmp = tempfile::tempdir()?;
1307        let f = tmp.path().join("ref.json");
1308        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1309
1310        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1311        let c = ValidateArgs {
1312            globs: vec![pattern],
1313            exclude: vec![],
1314            cache_dir: None,
1315            force_schema_fetch: true,
1316            force_validation: true,
1317            no_catalog: true,
1318            config_dir: None,
1319            schema_cache_ttl: None,
1320        };
1321        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1322        assert!(result.has_errors());
1323        Ok(())
1324    }
1325
1326    // --- JSON5 / JSONC tests ---
1327
1328    #[tokio::test]
1329    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1330        let tmp = tempfile::tempdir()?;
1331        let schema_path = tmp.path().join("schema.json");
1332        fs::write(&schema_path, SCHEMA)?;
1333
1334        let f = tmp.path().join("config.json5");
1335        fs::write(
1336            &f,
1337            format!(
1338                r#"{{
1339  // JSON5 comment
1340  "$schema": "{}",
1341  name: "hello",
1342}}"#,
1343                schema_path.to_string_lossy()
1344            ),
1345        )?;
1346
1347        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1348        let c = ValidateArgs {
1349            globs: vec![pattern],
1350            exclude: vec![],
1351            cache_dir: None,
1352            force_schema_fetch: true,
1353            force_validation: true,
1354            no_catalog: true,
1355            config_dir: None,
1356            schema_cache_ttl: None,
1357        };
1358        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1359        assert!(!result.has_errors());
1360        Ok(())
1361    }
1362
1363    #[tokio::test]
1364    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1365        let tmp = tempfile::tempdir()?;
1366        let schema_path = tmp.path().join("schema.json");
1367        fs::write(&schema_path, SCHEMA)?;
1368
1369        let f = tmp.path().join("config.jsonc");
1370        fs::write(
1371            &f,
1372            format!(
1373                r#"{{
1374  /* JSONC comment */
1375  "$schema": "{}",
1376  "name": "hello"
1377}}"#,
1378                schema_path.to_string_lossy()
1379            ),
1380        )?;
1381
1382        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1383        let c = ValidateArgs {
1384            globs: vec![pattern],
1385            exclude: vec![],
1386            cache_dir: None,
1387            force_schema_fetch: true,
1388            force_validation: true,
1389            no_catalog: true,
1390            config_dir: None,
1391            schema_cache_ttl: None,
1392        };
1393        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1394        assert!(!result.has_errors());
1395        Ok(())
1396    }
1397
1398    // --- Catalog-based schema matching ---
1399
1400    const GH_WORKFLOW_SCHEMA: &str = r#"{
1401        "type": "object",
1402        "properties": {
1403            "name": { "type": "string" },
1404            "on": {},
1405            "jobs": { "type": "object" }
1406        },
1407        "required": ["on", "jobs"]
1408    }"#;
1409
1410    fn gh_catalog_json() -> String {
1411        r#"{"schemas":[{
1412            "name": "GitHub Workflow",
1413            "url": "https://www.schemastore.org/github-workflow.json",
1414            "fileMatch": [
1415                "**/.github/workflows/*.yml",
1416                "**/.github/workflows/*.yaml"
1417            ]
1418        }]}"#
1419            .to_string()
1420    }
1421
1422    #[tokio::test]
1423    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1424        let tmp = tempfile::tempdir()?;
1425        let cache_tmp = tempfile::tempdir()?;
1426        let wf_dir = tmp.path().join(".github/workflows");
1427        fs::create_dir_all(&wf_dir)?;
1428        fs::write(
1429            wf_dir.join("ci.yml"),
1430            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1431        )?;
1432
1433        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1434        let client = mock(&[
1435            (
1436                "https://www.schemastore.org/api/json/catalog.json",
1437                &gh_catalog_json(),
1438            ),
1439            (
1440                "https://www.schemastore.org/github-workflow.json",
1441                GH_WORKFLOW_SCHEMA,
1442            ),
1443        ]);
1444        let c = ValidateArgs {
1445            globs: vec![pattern],
1446            exclude: vec![],
1447            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1448            force_schema_fetch: true,
1449            force_validation: true,
1450            no_catalog: false,
1451            config_dir: None,
1452            schema_cache_ttl: None,
1453        };
1454        let result = run_with(&c, Some(client), |_| {}).await?;
1455        assert!(!result.has_errors());
1456        Ok(())
1457    }
1458
1459    #[tokio::test]
1460    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1461        let tmp = tempfile::tempdir()?;
1462        let cache_tmp = tempfile::tempdir()?;
1463        let wf_dir = tmp.path().join(".github/workflows");
1464        fs::create_dir_all(&wf_dir)?;
1465        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1466
1467        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1468        let client = mock(&[
1469            (
1470                "https://www.schemastore.org/api/json/catalog.json",
1471                &gh_catalog_json(),
1472            ),
1473            (
1474                "https://www.schemastore.org/github-workflow.json",
1475                GH_WORKFLOW_SCHEMA,
1476            ),
1477        ]);
1478        let c = ValidateArgs {
1479            globs: vec![pattern],
1480            exclude: vec![],
1481            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1482            force_schema_fetch: true,
1483            force_validation: true,
1484            no_catalog: false,
1485            config_dir: None,
1486            schema_cache_ttl: None,
1487        };
1488        let result = run_with(&c, Some(client), |_| {}).await?;
1489        assert!(result.has_errors());
1490        Ok(())
1491    }
1492
1493    #[tokio::test]
1494    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1495        let tmp = tempfile::tempdir()?;
1496        let cache_tmp = tempfile::tempdir()?;
1497        let wf_dir = tmp.path().join(".github/workflows");
1498        fs::create_dir_all(&wf_dir)?;
1499        fs::write(
1500            wf_dir.join("ci.yml"),
1501            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1502        )?;
1503
1504        let client = mock(&[
1505            (
1506                "https://www.schemastore.org/api/json/catalog.json",
1507                &gh_catalog_json(),
1508            ),
1509            (
1510                "https://www.schemastore.org/github-workflow.json",
1511                GH_WORKFLOW_SCHEMA,
1512            ),
1513        ]);
1514        let c = ValidateArgs {
1515            globs: vec![],
1516            exclude: vec![],
1517            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1518            force_schema_fetch: true,
1519            force_validation: true,
1520            no_catalog: false,
1521            config_dir: None,
1522            schema_cache_ttl: None,
1523        };
1524
1525        let orig_dir = std::env::current_dir()?;
1526        std::env::set_current_dir(tmp.path())?;
1527        let result = run_with(&c, Some(client), |_| {}).await?;
1528        std::env::set_current_dir(orig_dir)?;
1529
1530        assert!(!result.has_errors());
1531        Ok(())
1532    }
1533
1534    // --- TOML tests ---
1535
1536    #[tokio::test]
1537    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1538        let tmp = tempfile::tempdir()?;
1539        let schema_path = tmp.path().join("schema.json");
1540        fs::write(&schema_path, SCHEMA)?;
1541
1542        let f = tmp.path().join("config.toml");
1543        fs::write(
1544            &f,
1545            format!(
1546                "# :schema {}\nname = \"hello\"\n",
1547                schema_path.to_string_lossy()
1548            ),
1549        )?;
1550
1551        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1552        let c = ValidateArgs {
1553            globs: vec![pattern],
1554            exclude: vec![],
1555            cache_dir: None,
1556            force_schema_fetch: true,
1557            force_validation: true,
1558            no_catalog: true,
1559            config_dir: None,
1560            schema_cache_ttl: None,
1561        };
1562        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1563        assert!(!result.has_errors());
1564        Ok(())
1565    }
1566
1567    // --- Rewrite rules + // resolution ---
1568
1569    #[tokio::test]
1570    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1571        let tmp = tempfile::tempdir()?;
1572
1573        let schemas_dir = tmp.path().join("schemas");
1574        fs::create_dir_all(&schemas_dir)?;
1575        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1576
1577        fs::write(
1578            tmp.path().join("lintel.toml"),
1579            r#"
1580[rewrite]
1581"http://localhost:9000/" = "//schemas/"
1582"#,
1583        )?;
1584
1585        let f = tmp.path().join("config.json");
1586        fs::write(
1587            &f,
1588            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1589        )?;
1590
1591        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1592        let c = ValidateArgs {
1593            globs: vec![pattern],
1594            exclude: vec![],
1595            cache_dir: None,
1596            force_schema_fetch: true,
1597            force_validation: true,
1598            no_catalog: true,
1599            config_dir: Some(tmp.path().to_path_buf()),
1600            schema_cache_ttl: None,
1601        };
1602
1603        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1604        assert!(!result.has_errors());
1605        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1606        Ok(())
1607    }
1608
1609    #[tokio::test]
1610    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1611        let tmp = tempfile::tempdir()?;
1612
1613        let schemas_dir = tmp.path().join("schemas");
1614        fs::create_dir_all(&schemas_dir)?;
1615        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1616
1617        fs::write(tmp.path().join("lintel.toml"), "")?;
1618
1619        let sub = tmp.path().join("deeply/nested");
1620        fs::create_dir_all(&sub)?;
1621        let f = sub.join("config.json");
1622        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1623
1624        let pattern = sub.join("*.json").to_string_lossy().to_string();
1625        let c = ValidateArgs {
1626            globs: vec![pattern],
1627            exclude: vec![],
1628            cache_dir: None,
1629            force_schema_fetch: true,
1630            force_validation: true,
1631            no_catalog: true,
1632            config_dir: Some(tmp.path().to_path_buf()),
1633            schema_cache_ttl: None,
1634        };
1635
1636        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1637        assert!(!result.has_errors());
1638        Ok(())
1639    }
1640
1641    // --- Format validation override ---
1642
1643    const FORMAT_SCHEMA: &str = r#"{
1644        "type": "object",
1645        "properties": {
1646            "link": { "type": "string", "format": "uri-reference" }
1647        }
1648    }"#;
1649
1650    #[tokio::test]
1651    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1652        let tmp = tempfile::tempdir()?;
1653        let schema_path = tmp.path().join("schema.json");
1654        fs::write(&schema_path, FORMAT_SCHEMA)?;
1655
1656        let f = tmp.path().join("data.json");
1657        fs::write(
1658            &f,
1659            format!(
1660                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1661                schema_path.to_string_lossy()
1662            ),
1663        )?;
1664
1665        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1666        let c = ValidateArgs {
1667            globs: vec![pattern],
1668            exclude: vec![],
1669            cache_dir: None,
1670            force_schema_fetch: true,
1671            force_validation: true,
1672            no_catalog: true,
1673            config_dir: Some(tmp.path().to_path_buf()),
1674            schema_cache_ttl: None,
1675        };
1676        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1677        assert!(
1678            result.has_errors(),
1679            "expected format error without override"
1680        );
1681        Ok(())
1682    }
1683
1684    #[tokio::test]
1685    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1686        let tmp = tempfile::tempdir()?;
1687        let schema_path = tmp.path().join("schema.json");
1688        fs::write(&schema_path, FORMAT_SCHEMA)?;
1689
1690        let f = tmp.path().join("data.json");
1691        fs::write(
1692            &f,
1693            format!(
1694                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1695                schema_path.to_string_lossy()
1696            ),
1697        )?;
1698
1699        // Use **/data.json to match the absolute path from the tempdir.
1700        fs::write(
1701            tmp.path().join("lintel.toml"),
1702            r#"
1703[[override]]
1704files = ["**/data.json"]
1705validate_formats = false
1706"#,
1707        )?;
1708
1709        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1710        let c = ValidateArgs {
1711            globs: vec![pattern],
1712            exclude: vec![],
1713            cache_dir: None,
1714            force_schema_fetch: true,
1715            force_validation: true,
1716            no_catalog: true,
1717            config_dir: Some(tmp.path().to_path_buf()),
1718            schema_cache_ttl: None,
1719        };
1720        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1721        assert!(
1722            !result.has_errors(),
1723            "expected no errors with validate_formats = false override"
1724        );
1725        Ok(())
1726    }
1727
1728    // --- Unrecognized extension handling ---
1729
1730    #[tokio::test]
1731    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1732        let tmp = tempfile::tempdir()?;
1733        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1734
1735        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1736        let c = ValidateArgs {
1737            globs: vec![pattern],
1738            exclude: vec![],
1739            cache_dir: None,
1740            force_schema_fetch: true,
1741            force_validation: true,
1742            no_catalog: true,
1743            config_dir: Some(tmp.path().to_path_buf()),
1744            schema_cache_ttl: None,
1745        };
1746        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1747        assert!(!result.has_errors());
1748        assert_eq!(result.files_checked(), 0);
1749        Ok(())
1750    }
1751
1752    #[tokio::test]
1753    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1754        let tmp = tempfile::tempdir()?;
1755        let cache_tmp = tempfile::tempdir()?;
1756        // File has .cfg extension (unrecognized) but content is valid JSON
1757        fs::write(
1758            tmp.path().join("myapp.cfg"),
1759            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1760        )?;
1761
1762        let catalog_json = r#"{"schemas":[{
1763            "name": "MyApp Config",
1764            "url": "https://example.com/myapp.schema.json",
1765            "fileMatch": ["*.cfg"]
1766        }]}"#;
1767        let schema =
1768            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1769
1770        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1771        let client = mock(&[
1772            (
1773                "https://www.schemastore.org/api/json/catalog.json",
1774                catalog_json,
1775            ),
1776            ("https://example.com/myapp.schema.json", schema),
1777        ]);
1778        let c = ValidateArgs {
1779            globs: vec![pattern],
1780            exclude: vec![],
1781            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1782            force_schema_fetch: true,
1783            force_validation: true,
1784            no_catalog: false,
1785            config_dir: Some(tmp.path().to_path_buf()),
1786            schema_cache_ttl: None,
1787        };
1788        let result = run_with(&c, Some(client), |_| {}).await?;
1789        assert!(!result.has_errors());
1790        assert_eq!(result.files_checked(), 1);
1791        Ok(())
1792    }
1793
1794    #[tokio::test]
1795    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1796        let tmp = tempfile::tempdir()?;
1797        let cache_tmp = tempfile::tempdir()?;
1798        // File matches catalog but content isn't parseable by any format
1799        fs::write(
1800            tmp.path().join("myapp.cfg"),
1801            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1802        )?;
1803
1804        let catalog_json = r#"{"schemas":[{
1805            "name": "MyApp Config",
1806            "url": "https://example.com/myapp.schema.json",
1807            "fileMatch": ["*.cfg"]
1808        }]}"#;
1809
1810        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1811        let client = mock(&[(
1812            "https://www.schemastore.org/api/json/catalog.json",
1813            catalog_json,
1814        )]);
1815        let c = ValidateArgs {
1816            globs: vec![pattern],
1817            exclude: vec![],
1818            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1819            force_schema_fetch: true,
1820            force_validation: true,
1821            no_catalog: false,
1822            config_dir: Some(tmp.path().to_path_buf()),
1823            schema_cache_ttl: None,
1824        };
1825        let result = run_with(&c, Some(client), |_| {}).await?;
1826        assert!(!result.has_errors());
1827        assert_eq!(result.files_checked(), 0);
1828        Ok(())
1829    }
1830
1831    #[tokio::test]
1832    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1833        let tmp = tempfile::tempdir()?;
1834        let cache_tmp = tempfile::tempdir()?;
1835        // File has .cfg extension, content is valid JSON but fails schema validation
1836        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1837
1838        let catalog_json = r#"{"schemas":[{
1839            "name": "MyApp Config",
1840            "url": "https://example.com/myapp.schema.json",
1841            "fileMatch": ["*.cfg"]
1842        }]}"#;
1843        let schema =
1844            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1845
1846        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1847        let client = mock(&[
1848            (
1849                "https://www.schemastore.org/api/json/catalog.json",
1850                catalog_json,
1851            ),
1852            ("https://example.com/myapp.schema.json", schema),
1853        ]);
1854        let c = ValidateArgs {
1855            globs: vec![pattern],
1856            exclude: vec![],
1857            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1858            force_schema_fetch: true,
1859            force_validation: true,
1860            no_catalog: false,
1861            config_dir: Some(tmp.path().to_path_buf()),
1862            schema_cache_ttl: None,
1863        };
1864        let result = run_with(&c, Some(client), |_| {}).await?;
1865        assert!(result.has_errors());
1866        assert_eq!(result.files_checked(), 1);
1867        Ok(())
1868    }
1869
1870    // --- Validation cache ---
1871
1872    #[tokio::test]
1873    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1874        let tmp = tempfile::tempdir()?;
1875        let schema_path = tmp.path().join("schema.json");
1876        fs::write(&schema_path, SCHEMA)?;
1877
1878        let f = tmp.path().join("valid.json");
1879        fs::write(
1880            &f,
1881            format!(
1882                r#"{{"$schema":"{}","name":"hello"}}"#,
1883                schema_path.to_string_lossy()
1884            ),
1885        )?;
1886
1887        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1888
1889        // First run: force_validation = false so results get cached
1890        let c = ValidateArgs {
1891            globs: vec![pattern.clone()],
1892            exclude: vec![],
1893            cache_dir: None,
1894            force_schema_fetch: true,
1895            force_validation: false,
1896            no_catalog: true,
1897            config_dir: None,
1898            schema_cache_ttl: None,
1899        };
1900        let mut first_statuses = Vec::new();
1901        let result = run_with(&c, Some(mock(&[])), |cf| {
1902            first_statuses.push(cf.validation_cache_status);
1903        })
1904        .await?;
1905        assert!(!result.has_errors());
1906        assert!(result.files_checked() > 0);
1907
1908        // Verify the first run recorded a validation cache miss
1909        assert!(
1910            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
1911            "expected at least one validation cache miss on first run"
1912        );
1913
1914        // Second run: same file, same schema — should hit validation cache
1915        let mut second_statuses = Vec::new();
1916        let result = run_with(&c, Some(mock(&[])), |cf| {
1917            second_statuses.push(cf.validation_cache_status);
1918        })
1919        .await?;
1920        assert!(!result.has_errors());
1921
1922        // Verify the second run got a validation cache hit
1923        assert!(
1924            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
1925            "expected at least one validation cache hit on second run"
1926        );
1927        Ok(())
1928    }
1929
1930    // --- clean_error_message ---
1931
1932    #[test]
1933    fn clean_strips_anyof_value() {
1934        let msg =
1935            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
1936        assert_eq!(
1937            clean_error_message(msg.to_string()),
1938            "not valid under any of the schemas listed in the 'anyOf' keyword"
1939        );
1940    }
1941
1942    #[test]
1943    fn clean_strips_oneof_value() {
1944        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
1945        assert_eq!(
1946            clean_error_message(msg.to_string()),
1947            "not valid under any of the schemas listed in the 'oneOf' keyword"
1948        );
1949    }
1950
1951    #[test]
1952    fn clean_strips_long_value() {
1953        let long_value = "x".repeat(5000);
1954        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
1955        let msg = format!("{long_value}{suffix}");
1956        assert_eq!(
1957            clean_error_message(msg),
1958            "not valid under any of the schemas listed in the 'anyOf' keyword"
1959        );
1960    }
1961
1962    #[test]
1963    fn clean_preserves_type_error() {
1964        let msg = r#"12345 is not of types "null", "string""#;
1965        assert_eq!(clean_error_message(msg.to_string()), msg);
1966    }
1967
1968    #[test]
1969    fn clean_preserves_required_property() {
1970        let msg = "\"name\" is a required property";
1971        assert_eq!(clean_error_message(msg.to_string()), msg);
1972    }
1973}