Skip to main content

lintel_validate/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog;
11use lintel_schema_cache::{CacheStatus, SchemaCache};
12use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
13use schema_catalog::{CompiledCatalog, FileFormat};
14
15use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
16use crate::discover;
17use crate::parsers::{self, Parser};
18use crate::registry;
19
20/// Conservative limit for concurrent file reads to avoid exhausting file
21/// descriptors. 128 is well below the default soft limit on macOS (256) and
22/// Linux (1024) while still providing good throughput.
23const FD_CONCURRENCY_LIMIT: usize = 128;
24
25/// Composite retriever that dispatches `file://` URIs to local disk reads
26/// and everything else to the HTTP-backed [`SchemaCache`].
27struct LocalRetriever {
28    http: SchemaCache,
29}
30
31#[async_trait::async_trait]
32impl jsonschema::AsyncRetrieve for LocalRetriever {
33    async fn retrieve(
34        &self,
35        uri: &jsonschema::Uri<String>,
36    ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
37        let s = uri.as_str();
38        if let Some(raw) = s.strip_prefix("file://") {
39            let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
40            let content = tokio::fs::read_to_string(path.as_ref()).await?;
41            Ok(serde_json::from_str(&content)?)
42        } else {
43            self.http.retrieve(uri).await
44        }
45    }
46}
47
48pub struct ValidateArgs {
49    /// Glob patterns to find files (empty = auto-discover)
50    pub globs: Vec<String>,
51
52    /// Exclude files matching these globs (repeatable)
53    pub exclude: Vec<String>,
54
55    /// Cache directory for remote schemas
56    pub cache_dir: Option<String>,
57
58    /// Bypass schema cache reads (still writes fetched schemas to cache)
59    pub force_schema_fetch: bool,
60
61    /// Bypass validation cache reads (still writes results to cache)
62    pub force_validation: bool,
63
64    /// Disable `SchemaStore` catalog matching
65    pub no_catalog: bool,
66
67    /// Directory to search for `lintel.toml` (defaults to cwd)
68    pub config_dir: Option<PathBuf>,
69
70    /// TTL for cached schemas. `None` means no expiry.
71    pub schema_cache_ttl: Option<core::time::Duration>,
72}
73
74/// Re-exported from [`crate::diagnostics::LintError`] so callers can use
75/// `lintel_validate::validate::LintError` without importing diagnostics.
76pub use crate::diagnostics::LintError;
77
78/// A file that was checked and the schema it resolved to.
79pub struct CheckedFile {
80    pub path: String,
81    pub schema: String,
82    /// `None` for local schemas and builtins; `Some` for remote schemas.
83    pub cache_status: Option<CacheStatus>,
84    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
85    pub validation_cache_status: Option<ValidationCacheStatus>,
86}
87
88/// Result of a validation run.
89pub struct ValidateResult {
90    pub errors: Vec<LintError>,
91    pub checked: Vec<CheckedFile>,
92}
93
94impl ValidateResult {
95    pub fn has_errors(&self) -> bool {
96        !self.errors.is_empty()
97    }
98
99    pub fn files_checked(&self) -> usize {
100        self.checked.len()
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Internal types
106// ---------------------------------------------------------------------------
107
108/// A file that has been parsed and matched to a schema URI.
109struct ParsedFile {
110    path: String,
111    content: String,
112    instance: Value,
113    /// Original schema URI before rewrites (for override matching).
114    original_schema_uri: String,
115}
116
117// ---------------------------------------------------------------------------
118// Config loading
119// ---------------------------------------------------------------------------
120
121/// Locate `lintel.toml`, load the full config, and return the config directory.
122/// Returns `(config, config_dir, config_path)`.  When no config is found or
123/// cwd is unavailable the config is default and `config_path` is `None`.
124#[tracing::instrument(skip_all)]
125pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
126    let start_dir = match search_dir {
127        Some(d) => d.to_path_buf(),
128        None => match std::env::current_dir() {
129            Ok(d) => d,
130            Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
131        },
132    };
133
134    let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
135        return (lintel_config::Config::default(), start_dir, None);
136    };
137
138    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
139    let cfg = lintel_config::find_and_load(&start_dir)
140        .ok()
141        .flatten()
142        .unwrap_or_default();
143    (cfg, dir, Some(config_path))
144}
145
146// ---------------------------------------------------------------------------
147// File collection
148// ---------------------------------------------------------------------------
149
150/// Collect input files from globs/directories, applying exclude filters.
151///
152/// # Errors
153///
154/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
155#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
156pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
157    if globs.is_empty() {
158        return discover::discover_files(".", exclude);
159    }
160
161    let mut result = Vec::new();
162    for pattern in globs {
163        let path = Path::new(pattern);
164        if path.is_dir() {
165            result.extend(discover::discover_files(pattern, exclude)?);
166        } else {
167            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
168                let path = entry?;
169                if path.is_file() && !is_excluded(&path, exclude) {
170                    result.push(path);
171                }
172            }
173        }
174    }
175    Ok(result)
176}
177
178fn is_excluded(path: &Path, excludes: &[String]) -> bool {
179    let path_str = match path.to_str() {
180        Some(s) => s.strip_prefix("./").unwrap_or(s),
181        None => return false,
182    };
183    excludes
184        .iter()
185        .any(|pattern| glob_match::glob_match(pattern, path_str))
186}
187
188// ---------------------------------------------------------------------------
189// Phase 1: Parse files and resolve schema URIs
190// ---------------------------------------------------------------------------
191
192/// Try parsing content with each known format, returning the first success.
193///
194/// JSONC is tried first (superset of JSON, handles comments), then YAML and
195/// TOML which cover the most common config formats, followed by the rest.
196pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
197    use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
198    const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
199
200    for fmt in FORMATS {
201        let parser = parsers::parser_for(fmt);
202        if let Ok(val) = parser.parse(content, file_name) {
203            return Some((fmt, val));
204        }
205    }
206    None
207}
208
209/// Result of processing a single file: either a parsed file with its schema URI,
210/// a lint error, or nothing (file was skipped).
211enum FileResult {
212    Parsed {
213        schema_uri: String,
214        parsed: ParsedFile,
215    },
216    Error(LintError),
217    Skip,
218}
219
220/// Resolve a relative local schema path against a base directory.
221///
222/// Remote URIs (http/https) are returned unchanged. For local paths, joins with
223/// the provided base directory (file's parent for inline `$schema`, config dir
224/// for config/catalog sources).
225fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
226    if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
227        return schema_uri.to_string();
228    }
229    if let Some(dir) = base_dir {
230        dir.join(schema_uri).to_string_lossy().to_string()
231    } else {
232        schema_uri.to_string()
233    }
234}
235
236/// Process a single file's already-read content: parse and resolve schema URI.
237///
238/// Returns a `Vec` because JSONL files expand to one result per non-empty line.
239#[allow(clippy::too_many_arguments)]
240fn process_one_file(
241    path: &Path,
242    content: String,
243    config: &lintel_config::Config,
244    config_dir: &Path,
245    compiled_catalogs: &[CompiledCatalog],
246) -> Vec<FileResult> {
247    let path_str = path.display().to_string();
248    let file_name = path
249        .file_name()
250        .and_then(|n| n.to_str())
251        .unwrap_or(&path_str);
252
253    let detected_format = parsers::detect_format(path);
254
255    // JSONL files get special per-line handling.
256    if detected_format == Some(FileFormat::Jsonl) {
257        return process_jsonl_file(
258            path,
259            &path_str,
260            file_name,
261            &content,
262            config,
263            config_dir,
264            compiled_catalogs,
265        );
266    }
267
268    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
269    if detected_format.is_none() {
270        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
271            || compiled_catalogs
272                .iter()
273                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
274        if !has_match {
275            return vec![FileResult::Skip];
276        }
277    }
278
279    // Parse the file content.
280    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
281        let parser = parsers::parser_for(fmt);
282        match parser.parse(&content, &path_str) {
283            Ok(val) => (parser, val),
284            Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
285        }
286    } else {
287        match try_parse_all(&content, &path_str) {
288            Some((fmt, val)) => (parsers::parser_for(fmt), val),
289            None => return vec![FileResult::Skip],
290        }
291    };
292
293    // Skip markdown files with no frontmatter
294    if instance.is_null() {
295        return vec![FileResult::Skip];
296    }
297
298    // Schema resolution priority:
299    // 1. Inline $schema / YAML modeline (always wins)
300    // 2. Custom schema mappings from lintel.toml [schemas]
301    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
302    //
303    // Track whether the URI came from inline $schema (resolve relative to file)
304    // or from config/catalog (resolve relative to config dir).
305    let inline_uri = parser.extract_schema_uri(&content, &instance);
306    let from_inline = inline_uri.is_some();
307    let schema_uri = inline_uri
308        .or_else(|| {
309            config
310                .find_schema_mapping(&path_str, file_name)
311                .map(str::to_string)
312        })
313        .or_else(|| {
314            compiled_catalogs
315                .iter()
316                .find_map(|cat| cat.find_schema(&path_str, file_name))
317                .map(str::to_string)
318        });
319
320    let Some(schema_uri) = schema_uri else {
321        return vec![FileResult::Skip];
322    };
323
324    // Keep original URI for override matching (before rewrites)
325    let original_schema_uri = schema_uri.clone();
326
327    // Apply rewrite rules, then resolve // paths relative to lintel.toml
328    let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
329    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
330
331    // Resolve relative local paths:
332    // - Inline $schema: relative to the file's parent directory
333    // - Config/catalog: relative to the config directory (where lintel.toml lives)
334    let schema_uri = resolve_local_schema_path(
335        &schema_uri,
336        if from_inline {
337            path.parent()
338        } else {
339            Some(config_dir)
340        },
341    );
342
343    vec![FileResult::Parsed {
344        schema_uri,
345        parsed: ParsedFile {
346            path: path_str,
347            content,
348            instance,
349            original_schema_uri,
350        },
351    }]
352}
353
354/// Process a JSONL file: parse each line independently and resolve schemas.
355///
356/// Each non-empty line becomes its own [`FileResult::Parsed`]. Schema resolution
357/// priority per line: inline `$schema` on the line > config mapping > catalog.
358///
359/// Also checks schema consistency across lines — mismatches are emitted as
360/// [`FileResult::Error`] so they flow through the normal Reporter pipeline.
361#[allow(clippy::too_many_arguments)]
362fn process_jsonl_file(
363    path: &Path,
364    path_str: &str,
365    file_name: &str,
366    content: &str,
367    config: &lintel_config::Config,
368    config_dir: &Path,
369    compiled_catalogs: &[CompiledCatalog],
370) -> Vec<FileResult> {
371    let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
372        Ok(lines) => lines,
373        Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
374    };
375
376    if lines.is_empty() {
377        return vec![FileResult::Skip];
378    }
379
380    let mut results = Vec::with_capacity(lines.len());
381
382    // Check schema consistency before consuming lines.
383    if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
384        for m in mismatches {
385            results.push(FileResult::Error(LintError::SchemaMismatch {
386                path: path_str.to_string(),
387                line_number: m.line_number,
388                message: format!("expected consistent $schema but found {}", m.schema_uri),
389            }));
390        }
391    }
392
393    for line in lines {
394        // Schema resolution: inline $schema on line > config > catalog
395        // Track source to resolve relative paths correctly.
396        let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
397        let from_inline = inline_uri.is_some();
398        let schema_uri = inline_uri
399            .or_else(|| {
400                config
401                    .find_schema_mapping(path_str, file_name)
402                    .map(str::to_string)
403            })
404            .or_else(|| {
405                compiled_catalogs
406                    .iter()
407                    .find_map(|cat| cat.find_schema(path_str, file_name))
408                    .map(str::to_string)
409            });
410
411        let Some(schema_uri) = schema_uri else {
412            continue;
413        };
414
415        let original_schema_uri = schema_uri.clone();
416
417        let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
418        let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
419
420        // Inline $schema: relative to file's parent. Config/catalog: relative to config dir.
421        let schema_uri = resolve_local_schema_path(
422            &schema_uri,
423            if from_inline {
424                path.parent()
425            } else {
426                Some(config_dir)
427            },
428        );
429
430        let line_path = format!("{path_str}:{}", line.line_number);
431
432        results.push(FileResult::Parsed {
433            schema_uri,
434            parsed: ParsedFile {
435                path: line_path,
436                content: line.raw,
437                instance: line.value,
438                original_schema_uri,
439            },
440        });
441    }
442
443    if results.is_empty() {
444        vec![FileResult::Skip]
445    } else {
446        results
447    }
448}
449
450/// Read each file concurrently with tokio, parse its content, extract its
451/// schema URI, apply rewrites, and group by resolved schema URI.
452#[tracing::instrument(skip_all, fields(file_count = files.len()))]
453#[allow(clippy::too_many_arguments)]
454async fn parse_and_group_files(
455    files: &[PathBuf],
456    config: &lintel_config::Config,
457    config_dir: &Path,
458    compiled_catalogs: &[CompiledCatalog],
459    errors: &mut Vec<LintError>,
460) -> BTreeMap<String, Vec<ParsedFile>> {
461    // Read all files concurrently using tokio async I/O, with a semaphore
462    // to avoid exhausting file descriptors on large directories.
463    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
464    let mut read_set = tokio::task::JoinSet::new();
465    for path in files {
466        let path = path.clone();
467        let sem = semaphore.clone();
468        read_set.spawn(async move {
469            let _permit = sem.acquire().await.expect("semaphore closed");
470            let result = tokio::fs::read_to_string(&path).await;
471            (path, result)
472        });
473    }
474
475    let mut file_contents = Vec::with_capacity(files.len());
476    while let Some(result) = read_set.join_next().await {
477        match result {
478            Ok(item) => file_contents.push(item),
479            Err(e) => tracing::warn!("file read task panicked: {e}"),
480        }
481    }
482
483    // Process files: parse content and resolve schema URIs.
484    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
485    for (path, content_result) in file_contents {
486        let content = match content_result {
487            Ok(c) => c,
488            Err(e) => {
489                errors.push(LintError::Io {
490                    path: path.display().to_string(),
491                    message: format!("failed to read: {e}"),
492                });
493                continue;
494            }
495        };
496        let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
497        for result in results {
498            match result {
499                FileResult::Parsed { schema_uri, parsed } => {
500                    schema_groups.entry(schema_uri).or_default().push(parsed);
501                }
502                FileResult::Error(e) => errors.push(e),
503                FileResult::Skip => {}
504            }
505        }
506    }
507
508    schema_groups
509}
510
511// ---------------------------------------------------------------------------
512// Phase 2: Schema fetching, compilation, and instance validation
513// ---------------------------------------------------------------------------
514
515/// Fetch a schema by URI, returning its parsed JSON and cache status.
516///
517/// For remote URIs, checks the prefetched map first; for local URIs, reads
518/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
519#[allow(clippy::too_many_arguments)]
520async fn fetch_schema_from_prefetched(
521    schema_uri: &str,
522    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
523    local_cache: &mut HashMap<String, Value>,
524    group: &[ParsedFile],
525    errors: &mut Vec<LintError>,
526    checked: &mut Vec<CheckedFile>,
527    on_check: &mut impl FnMut(&CheckedFile),
528) -> Option<(Value, Option<CacheStatus>)> {
529    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
530
531    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
532        match prefetched.get(schema_uri) {
533            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
534            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
535            None => Err(format!("schema not prefetched: {schema_uri}")),
536        }
537    } else if let Some(cached) = local_cache.get(schema_uri) {
538        Ok((cached.clone(), None))
539    } else {
540        tokio::fs::read_to_string(schema_uri)
541            .await
542            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
543            .and_then(|content| {
544                serde_json::from_str::<Value>(&content)
545                    .map(|v| {
546                        local_cache.insert(schema_uri.to_string(), v.clone());
547                        (v, None)
548                    })
549                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
550            })
551    };
552
553    match result {
554        Ok(value) => Some(value),
555        Err(message) => {
556            report_group_error(
557                |path| LintError::SchemaFetch {
558                    path: path.to_string(),
559                    message: message.clone(),
560                },
561                schema_uri,
562                None,
563                group,
564                errors,
565                checked,
566                on_check,
567            );
568            None
569        }
570    }
571}
572
573/// Report the same error for every file in a schema group.
574#[allow(clippy::too_many_arguments)]
575fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
576    make_error: impl Fn(&str) -> LintError,
577    schema_uri: &str,
578    cache_status: Option<CacheStatus>,
579    group: &[P],
580    errors: &mut Vec<LintError>,
581    checked: &mut Vec<CheckedFile>,
582    on_check: &mut impl FnMut(&CheckedFile),
583) {
584    for item in group {
585        let pf = item.borrow();
586        let cf = CheckedFile {
587            path: pf.path.clone(),
588            schema: schema_uri.to_string(),
589            cache_status,
590            validation_cache_status: None,
591        };
592        on_check(&cf);
593        checked.push(cf);
594        errors.push(make_error(&pf.path));
595    }
596}
597
598/// Mark every file in a group as checked (no errors).
599#[allow(clippy::too_many_arguments)]
600fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
601    schema_uri: &str,
602    cache_status: Option<CacheStatus>,
603    validation_cache_status: Option<ValidationCacheStatus>,
604    group: &[P],
605    checked: &mut Vec<CheckedFile>,
606    on_check: &mut impl FnMut(&CheckedFile),
607) {
608    for item in group {
609        let pf = item.borrow();
610        let cf = CheckedFile {
611            path: pf.path.clone(),
612            schema: schema_uri.to_string(),
613            cache_status,
614            validation_cache_status,
615        };
616        on_check(&cf);
617        checked.push(cf);
618    }
619}
620
621/// Clean up error messages from the `jsonschema` crate.
622///
623/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
624/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
625/// The source snippet already shows the value, so we strip the redundant prefix
626/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
627///
628/// All other messages are returned unchanged.
629fn clean_error_message(msg: String) -> String {
630    const MARKER: &str = " is not valid under any of the schemas listed in the '";
631    if let Some(pos) = msg.find(MARKER) {
632        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
633        return msg[pos + 4..].to_string();
634    }
635    msg
636}
637
638/// Convert [`ValidationError`]s into [`LintError::Validation`] diagnostics.
639fn push_validation_errors(
640    pf: &ParsedFile,
641    schema_url: &str,
642    validation_errors: &[ValidationError],
643    errors: &mut Vec<LintError>,
644) {
645    for ve in validation_errors {
646        let span = find_instance_path_span(&pf.content, &ve.instance_path);
647        let instance_path = if ve.instance_path.is_empty() {
648            DEFAULT_LABEL.to_string()
649        } else {
650            ve.instance_path.clone()
651        };
652        let label = format_label(&instance_path, &ve.schema_path);
653        let source_span: miette::SourceSpan = span.into();
654        errors.push(LintError::Validation {
655            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
656            span: source_span,
657            schema_span: source_span,
658            path: pf.path.clone(),
659            instance_path,
660            label,
661            message: ve.message.clone(),
662            schema_url: schema_url.to_string(),
663            schema_path: ve.schema_path.clone(),
664        });
665    }
666}
667
668/// Validate all files in a group against an already-compiled validator and store
669/// results in the validation cache.
670#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
671#[allow(clippy::too_many_arguments)]
672async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
673    validator: &jsonschema::Validator,
674    schema_uri: &str,
675    schema_hash: &str,
676    validate_formats: bool,
677    cache_status: Option<CacheStatus>,
678    group: &[P],
679    vcache: &lintel_validation_cache::ValidationCache,
680    errors: &mut Vec<LintError>,
681    checked: &mut Vec<CheckedFile>,
682    on_check: &mut impl FnMut(&CheckedFile),
683) {
684    for item in group {
685        let pf = item.borrow();
686        let file_errors: Vec<ValidationError> = validator
687            .iter_errors(&pf.instance)
688            .map(|error| ValidationError {
689                instance_path: error.instance_path().to_string(),
690                message: clean_error_message(error.to_string()),
691                schema_path: error.schema_path().to_string(),
692            })
693            .collect();
694
695        vcache
696            .store(
697                &lintel_validation_cache::CacheKey {
698                    file_content: &pf.content,
699                    schema_hash,
700                    validate_formats,
701                },
702                &file_errors,
703            )
704            .await;
705        push_validation_errors(pf, schema_uri, &file_errors, errors);
706
707        let cf = CheckedFile {
708            path: pf.path.clone(),
709            schema: schema_uri.to_string(),
710            cache_status,
711            validation_cache_status: Some(ValidationCacheStatus::Miss),
712        };
713        on_check(&cf);
714        checked.push(cf);
715    }
716}
717
718// ---------------------------------------------------------------------------
719// Public API
720// ---------------------------------------------------------------------------
721
722/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
723///
724/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
725pub async fn fetch_compiled_catalogs(
726    retriever: &SchemaCache,
727    config: &lintel_config::Config,
728    no_catalog: bool,
729) -> Vec<CompiledCatalog> {
730    let mut compiled_catalogs = Vec::new();
731
732    if !no_catalog {
733        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
734
735        // Catalogs are fetched concurrently but sorted by priority so that
736        // the Lintel catalog wins over custom registries, which win over
737        // SchemaStore.  The `order` field encodes this precedence.
738        #[allow(clippy::items_after_statements)]
739        type CatalogResult = (
740            usize, // priority (lower = higher precedence)
741            String,
742            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
743        );
744        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
745
746        // Custom registries from lintel.toml (highest precedence among catalogs)
747        for (i, registry_url) in config.registries.iter().enumerate() {
748            let r = retriever.clone();
749            let url = registry_url.clone();
750            let label = format!("registry {url}");
751            catalog_tasks.spawn(async move {
752                let result = registry::fetch(&r, &url)
753                    .await
754                    .map(|cat| CompiledCatalog::compile(&cat));
755                (i, label, result)
756            });
757        }
758
759        // Lintel catalog
760        let lintel_order = config.registries.len();
761        if !config.no_default_catalog {
762            let r = retriever.clone();
763            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
764            catalog_tasks.spawn(async move {
765                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
766                    .await
767                    .map(|cat| CompiledCatalog::compile(&cat));
768                (lintel_order, label, result)
769            });
770        }
771
772        // SchemaStore catalog (lowest precedence)
773        let schemastore_order = config.registries.len() + 1;
774        let r = retriever.clone();
775        catalog_tasks.spawn(async move {
776            let result = catalog::fetch_catalog(&r)
777                .await
778                .map(|cat| CompiledCatalog::compile(&cat));
779            (schemastore_order, "SchemaStore catalog".to_string(), result)
780        });
781
782        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
783        while let Some(result) = catalog_tasks.join_next().await {
784            match result {
785                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
786                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
787                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
788            }
789        }
790        results.sort_by_key(|(order, _)| *order);
791        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
792
793        drop(catalog_span);
794    }
795
796    compiled_catalogs
797}
798
799/// # Errors
800///
801/// Returns an error if file collection or schema validation encounters an I/O error.
802pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
803    run_with(args, None, |_| {}).await
804}
805
806/// Like [`run`], but calls `on_check` each time a file is checked, allowing
807/// callers to stream progress (e.g. verbose output) as files are processed.
808///
809/// # Errors
810///
811/// Returns an error if file collection or schema validation encounters an I/O error.
812#[tracing::instrument(skip_all, name = "validate")]
813#[allow(clippy::too_many_lines)]
814pub async fn run_with(
815    args: &ValidateArgs,
816    cache: Option<SchemaCache>,
817    mut on_check: impl FnMut(&CheckedFile),
818) -> Result<ValidateResult> {
819    let retriever = if let Some(c) = cache {
820        c
821    } else {
822        let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
823        if let Some(dir) = &args.cache_dir {
824            let path = PathBuf::from(dir);
825            let _ = fs::create_dir_all(&path);
826            builder = builder.cache_dir(path);
827        }
828        if let Some(ttl) = args.schema_cache_ttl {
829            builder = builder.ttl(ttl);
830        }
831        builder.build()
832    };
833
834    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
835    let files = collect_files(&args.globs, &args.exclude)?;
836    tracing::info!(file_count = files.len(), "collected files");
837
838    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
839
840    let mut errors: Vec<LintError> = Vec::new();
841    let mut checked: Vec<CheckedFile> = Vec::new();
842
843    // Phase 1: Parse files and resolve schema URIs
844    let schema_groups = parse_and_group_files(
845        &files,
846        &config,
847        &config_dir,
848        &compiled_catalogs,
849        &mut errors,
850    )
851    .await;
852    tracing::info!(
853        schema_count = schema_groups.len(),
854        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
855        "grouped files by schema"
856    );
857
858    // Create validation cache
859    let vcache = lintel_validation_cache::ValidationCache::new(
860        lintel_validation_cache::ensure_cache_dir(),
861        args.force_validation,
862    );
863
864    // Prefetch all remote schemas in parallel
865    let remote_uris: Vec<&String> = schema_groups
866        .keys()
867        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
868        .collect();
869
870    let prefetched = {
871        let _prefetch_span =
872            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
873
874        let mut schema_tasks = tokio::task::JoinSet::new();
875        for uri in remote_uris {
876            let r = retriever.clone();
877            let u = uri.clone();
878            schema_tasks.spawn(async move {
879                let result = r.fetch(&u).await;
880                (u, result)
881            });
882        }
883
884        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
885        while let Some(result) = schema_tasks.join_next().await {
886            match result {
887                Ok((uri, fetch_result)) => {
888                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
889                }
890                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
891            }
892        }
893
894        prefetched
895    };
896
897    // Phase 2: Compile each schema once and validate all matching files
898    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
899    let mut fetch_time = core::time::Duration::ZERO;
900    let mut hash_time = core::time::Duration::ZERO;
901    let mut vcache_time = core::time::Duration::ZERO;
902    let mut compile_time = core::time::Duration::ZERO;
903    let mut validate_time = core::time::Duration::ZERO;
904
905    for (schema_uri, group) in &schema_groups {
906        let _group_span = tracing::debug_span!(
907            "schema_group",
908            schema = schema_uri.as_str(),
909            files = group.len(),
910        )
911        .entered();
912
913        // If ANY file in the group matches a `validate_formats = false` override,
914        // disable format validation for the whole group (they share one compiled validator).
915        let validate_formats = group.iter().all(|pf| {
916            config
917                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
918        });
919
920        // Remote schemas were prefetched in parallel above; local schemas are
921        // read from disk here (with in-memory caching).
922        let t = std::time::Instant::now();
923        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
924            schema_uri,
925            &prefetched,
926            &mut local_schema_cache,
927            group,
928            &mut errors,
929            &mut checked,
930            &mut on_check,
931        )
932        .await
933        else {
934            fetch_time += t.elapsed();
935            continue;
936        };
937        fetch_time += t.elapsed();
938
939        // Pre-compute schema hash once for the entire group.
940        let t = std::time::Instant::now();
941        let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
942        hash_time += t.elapsed();
943
944        // Split the group into validation cache hits and misses.
945        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
946
947        let t = std::time::Instant::now();
948        for pf in group {
949            let (cached, vcache_status) = vcache
950                .lookup(&lintel_validation_cache::CacheKey {
951                    file_content: &pf.content,
952                    schema_hash: &schema_hash,
953                    validate_formats,
954                })
955                .await;
956
957            if let Some(cached_errors) = cached {
958                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
959                let cf = CheckedFile {
960                    path: pf.path.clone(),
961                    schema: schema_uri.clone(),
962                    cache_status,
963                    validation_cache_status: Some(vcache_status),
964                };
965                on_check(&cf);
966                checked.push(cf);
967            } else {
968                cache_misses.push(pf);
969            }
970        }
971        vcache_time += t.elapsed();
972
973        tracing::debug!(
974            cache_hits = group.len() - cache_misses.len(),
975            cache_misses = cache_misses.len(),
976            "validation cache"
977        );
978
979        // If all files hit the validation cache, skip schema compilation entirely.
980        if cache_misses.is_empty() {
981            continue;
982        }
983
984        // Compile the schema for cache misses.
985        let t = std::time::Instant::now();
986        let validator = {
987            // Set base URI so relative $ref values (e.g. "./rule.json") resolve
988            // correctly. Remote schemas use the HTTP URI directly; local schemas
989            // get a file:// URI derived from the canonical absolute path.
990            let is_remote_schema =
991                schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
992            let local_retriever = LocalRetriever {
993                http: retriever.clone(),
994            };
995            let opts = jsonschema::async_options()
996                .with_retriever(local_retriever)
997                .should_validate_formats(validate_formats);
998            let base_uri = if is_remote_schema {
999                Some(schema_uri.clone())
1000            } else {
1001                std::fs::canonicalize(schema_uri)
1002                    .ok()
1003                    .map(|p| format!("file://{}", p.display()))
1004            };
1005            let opts = if let Some(uri) = base_uri {
1006                opts.with_base_uri(uri)
1007            } else {
1008                opts
1009            };
1010            match opts.build(&schema_value).await {
1011                Ok(v) => v,
1012                Err(e) => {
1013                    compile_time += t.elapsed();
1014                    // When format validation is disabled and the compilation error
1015                    // is a uri-reference issue (e.g. Rust-style $ref paths in
1016                    // vector.json), skip validation silently.
1017                    if !validate_formats && e.to_string().contains("uri-reference") {
1018                        mark_group_checked(
1019                            schema_uri,
1020                            cache_status,
1021                            Some(ValidationCacheStatus::Miss),
1022                            &cache_misses,
1023                            &mut checked,
1024                            &mut on_check,
1025                        );
1026                        continue;
1027                    }
1028                    let msg = format!("failed to compile schema: {e}");
1029                    report_group_error(
1030                        |path| LintError::SchemaCompile {
1031                            path: path.to_string(),
1032                            message: msg.clone(),
1033                        },
1034                        schema_uri,
1035                        cache_status,
1036                        &cache_misses,
1037                        &mut errors,
1038                        &mut checked,
1039                        &mut on_check,
1040                    );
1041                    continue;
1042                }
1043            }
1044        };
1045        compile_time += t.elapsed();
1046
1047        let t = std::time::Instant::now();
1048        validate_group(
1049            &validator,
1050            schema_uri,
1051            &schema_hash,
1052            validate_formats,
1053            cache_status,
1054            &cache_misses,
1055            &vcache,
1056            &mut errors,
1057            &mut checked,
1058            &mut on_check,
1059        )
1060        .await;
1061        validate_time += t.elapsed();
1062    }
1063
1064    #[allow(clippy::cast_possible_truncation)]
1065    {
1066        tracing::info!(
1067            fetch_ms = fetch_time.as_millis() as u64,
1068            hash_ms = hash_time.as_millis() as u64,
1069            vcache_ms = vcache_time.as_millis() as u64,
1070            compile_ms = compile_time.as_millis() as u64,
1071            validate_ms = validate_time.as_millis() as u64,
1072            "phase2 breakdown"
1073        );
1074    }
1075
1076    // Sort errors for deterministic output (by path, then by span offset)
1077    errors.sort_by(|a, b| {
1078        a.path()
1079            .cmp(b.path())
1080            .then_with(|| a.offset().cmp(&b.offset()))
1081    });
1082
1083    Ok(ValidateResult { errors, checked })
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088    use super::*;
1089    use lintel_schema_cache::SchemaCache;
1090    use std::path::Path;
1091
1092    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1093        let cache = SchemaCache::memory();
1094        for (uri, body) in entries {
1095            cache.insert(
1096                uri,
1097                serde_json::from_str(body).expect("test mock: invalid JSON"),
1098            );
1099        }
1100        cache
1101    }
1102
1103    fn testdata() -> PathBuf {
1104        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1105    }
1106
1107    /// Build glob patterns that scan one or more testdata directories for all supported file types.
1108    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1109        dirs.iter()
1110            .flat_map(|dir| {
1111                let base = testdata().join(dir);
1112                vec![
1113                    base.join("*.json").to_string_lossy().to_string(),
1114                    base.join("*.yaml").to_string_lossy().to_string(),
1115                    base.join("*.yml").to_string_lossy().to_string(),
1116                    base.join("*.json5").to_string_lossy().to_string(),
1117                    base.join("*.jsonc").to_string_lossy().to_string(),
1118                    base.join("*.toml").to_string_lossy().to_string(),
1119                ]
1120            })
1121            .collect()
1122    }
1123
1124    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1125        ValidateArgs {
1126            globs: scenario_globs(dirs),
1127            exclude: vec![],
1128            cache_dir: None,
1129            force_schema_fetch: true,
1130            force_validation: true,
1131            no_catalog: true,
1132            config_dir: None,
1133            schema_cache_ttl: None,
1134        }
1135    }
1136
1137    const SCHEMA: &str =
1138        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1139
1140    fn schema_mock() -> SchemaCache {
1141        mock(&[("https://example.com/schema.json", SCHEMA)])
1142    }
1143
1144    // --- Directory scanning tests ---
1145
1146    #[tokio::test]
1147    async fn no_matching_files() -> anyhow::Result<()> {
1148        let tmp = tempfile::tempdir()?;
1149        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1150        let c = ValidateArgs {
1151            globs: vec![pattern],
1152            exclude: vec![],
1153            cache_dir: None,
1154            force_schema_fetch: true,
1155            force_validation: true,
1156            no_catalog: true,
1157            config_dir: None,
1158            schema_cache_ttl: None,
1159        };
1160        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1161        assert!(!result.has_errors());
1162        Ok(())
1163    }
1164
1165    #[tokio::test]
1166    async fn dir_all_valid() -> anyhow::Result<()> {
1167        let c = args_for_dirs(&["positive_tests"]);
1168        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1169        assert!(!result.has_errors());
1170        Ok(())
1171    }
1172
1173    #[tokio::test]
1174    async fn dir_all_invalid() -> anyhow::Result<()> {
1175        let c = args_for_dirs(&["negative_tests"]);
1176        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1177        assert!(result.has_errors());
1178        Ok(())
1179    }
1180
1181    #[tokio::test]
1182    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1183        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1184        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1185        assert!(result.has_errors());
1186        Ok(())
1187    }
1188
1189    #[tokio::test]
1190    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1191        let c = args_for_dirs(&["no_schema"]);
1192        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1193        assert!(!result.has_errors());
1194        Ok(())
1195    }
1196
1197    #[tokio::test]
1198    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1199        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1200        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1201        assert!(!result.has_errors());
1202        Ok(())
1203    }
1204
1205    // --- Directory as positional arg ---
1206
1207    #[tokio::test]
1208    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1209        let dir = testdata().join("positive_tests");
1210        let c = ValidateArgs {
1211            globs: vec![dir.to_string_lossy().to_string()],
1212            exclude: vec![],
1213            cache_dir: None,
1214            force_schema_fetch: true,
1215            force_validation: true,
1216            no_catalog: true,
1217            config_dir: None,
1218            schema_cache_ttl: None,
1219        };
1220        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1221        assert!(!result.has_errors());
1222        assert!(result.files_checked() > 0);
1223        Ok(())
1224    }
1225
1226    #[tokio::test]
1227    async fn multiple_directory_args() -> anyhow::Result<()> {
1228        let pos_dir = testdata().join("positive_tests");
1229        let no_schema_dir = testdata().join("no_schema");
1230        let c = ValidateArgs {
1231            globs: vec![
1232                pos_dir.to_string_lossy().to_string(),
1233                no_schema_dir.to_string_lossy().to_string(),
1234            ],
1235            exclude: vec![],
1236            cache_dir: None,
1237            force_schema_fetch: true,
1238            force_validation: true,
1239            no_catalog: true,
1240            config_dir: None,
1241            schema_cache_ttl: None,
1242        };
1243        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1244        assert!(!result.has_errors());
1245        Ok(())
1246    }
1247
1248    #[tokio::test]
1249    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1250        let dir = testdata().join("positive_tests");
1251        let glob_pattern = testdata()
1252            .join("no_schema")
1253            .join("*.json")
1254            .to_string_lossy()
1255            .to_string();
1256        let c = ValidateArgs {
1257            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1258            exclude: vec![],
1259            cache_dir: None,
1260            force_schema_fetch: true,
1261            force_validation: true,
1262            no_catalog: true,
1263            config_dir: None,
1264            schema_cache_ttl: None,
1265        };
1266        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1267        assert!(!result.has_errors());
1268        Ok(())
1269    }
1270
1271    #[tokio::test]
1272    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1273        let base = testdata().join("malformed");
1274        let c = ValidateArgs {
1275            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1276            exclude: vec![],
1277            cache_dir: None,
1278            force_schema_fetch: true,
1279            force_validation: true,
1280            no_catalog: true,
1281            config_dir: None,
1282            schema_cache_ttl: None,
1283        };
1284        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1285        assert!(result.has_errors());
1286        Ok(())
1287    }
1288
1289    #[tokio::test]
1290    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1291        let base = testdata().join("malformed");
1292        let c = ValidateArgs {
1293            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1294            exclude: vec![],
1295            cache_dir: None,
1296            force_schema_fetch: true,
1297            force_validation: true,
1298            no_catalog: true,
1299            config_dir: None,
1300            schema_cache_ttl: None,
1301        };
1302        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1303        assert!(result.has_errors());
1304        Ok(())
1305    }
1306
1307    // --- Exclude filter ---
1308
1309    #[tokio::test]
1310    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1311        let base = testdata().join("negative_tests");
1312        let c = ValidateArgs {
1313            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1314            exclude: vec![
1315                base.join("missing_name.json").to_string_lossy().to_string(),
1316                base.join("missing_name.toml").to_string_lossy().to_string(),
1317                base.join("missing_name.yaml").to_string_lossy().to_string(),
1318            ],
1319            cache_dir: None,
1320            force_schema_fetch: true,
1321            force_validation: true,
1322            no_catalog: true,
1323            config_dir: None,
1324            schema_cache_ttl: None,
1325        };
1326        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1327        assert!(!result.has_errors());
1328        Ok(())
1329    }
1330
1331    // --- Cache options ---
1332
1333    #[tokio::test]
1334    async fn custom_cache_dir() -> anyhow::Result<()> {
1335        let c = ValidateArgs {
1336            globs: scenario_globs(&["positive_tests"]),
1337            exclude: vec![],
1338            cache_dir: None,
1339            force_schema_fetch: true,
1340            force_validation: true,
1341            no_catalog: true,
1342            config_dir: None,
1343            schema_cache_ttl: None,
1344        };
1345        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1346        assert!(!result.has_errors());
1347        Ok(())
1348    }
1349
1350    // --- Local schema ---
1351
1352    #[tokio::test]
1353    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1354        let tmp = tempfile::tempdir()?;
1355        let schema_path = tmp.path().join("schema.json");
1356        fs::write(&schema_path, SCHEMA)?;
1357
1358        let f = tmp.path().join("valid.json");
1359        fs::write(
1360            &f,
1361            format!(
1362                r#"{{"$schema":"{}","name":"hello"}}"#,
1363                schema_path.to_string_lossy()
1364            ),
1365        )?;
1366
1367        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1368        let c = ValidateArgs {
1369            globs: vec![pattern],
1370            exclude: vec![],
1371            cache_dir: None,
1372            force_schema_fetch: true,
1373            force_validation: true,
1374            no_catalog: true,
1375            config_dir: None,
1376            schema_cache_ttl: None,
1377        };
1378        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1379        assert!(!result.has_errors());
1380        Ok(())
1381    }
1382
1383    #[tokio::test]
1384    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1385        let tmp = tempfile::tempdir()?;
1386        let schema_path = tmp.path().join("schema.json");
1387        fs::write(&schema_path, SCHEMA)?;
1388
1389        let f = tmp.path().join("valid.yaml");
1390        fs::write(
1391            &f,
1392            format!(
1393                "# yaml-language-server: $schema={}\nname: hello\n",
1394                schema_path.to_string_lossy()
1395            ),
1396        )?;
1397
1398        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1399        let c = ValidateArgs {
1400            globs: vec![pattern],
1401            exclude: vec![],
1402            cache_dir: None,
1403            force_schema_fetch: true,
1404            force_validation: true,
1405            no_catalog: true,
1406            config_dir: None,
1407            schema_cache_ttl: None,
1408        };
1409        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1410        assert!(!result.has_errors());
1411        Ok(())
1412    }
1413
1414    #[tokio::test]
1415    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1416        let tmp = tempfile::tempdir()?;
1417        let f = tmp.path().join("ref.json");
1418        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1419
1420        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1421        let c = ValidateArgs {
1422            globs: vec![pattern],
1423            exclude: vec![],
1424            cache_dir: None,
1425            force_schema_fetch: true,
1426            force_validation: true,
1427            no_catalog: true,
1428            config_dir: None,
1429            schema_cache_ttl: None,
1430        };
1431        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1432        assert!(result.has_errors());
1433        Ok(())
1434    }
1435
1436    // --- JSON5 / JSONC tests ---
1437
1438    #[tokio::test]
1439    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1440        let tmp = tempfile::tempdir()?;
1441        let schema_path = tmp.path().join("schema.json");
1442        fs::write(&schema_path, SCHEMA)?;
1443
1444        let f = tmp.path().join("config.json5");
1445        fs::write(
1446            &f,
1447            format!(
1448                r#"{{
1449  // JSON5 comment
1450  "$schema": "{}",
1451  name: "hello",
1452}}"#,
1453                schema_path.to_string_lossy()
1454            ),
1455        )?;
1456
1457        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1458        let c = ValidateArgs {
1459            globs: vec![pattern],
1460            exclude: vec![],
1461            cache_dir: None,
1462            force_schema_fetch: true,
1463            force_validation: true,
1464            no_catalog: true,
1465            config_dir: None,
1466            schema_cache_ttl: None,
1467        };
1468        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1469        assert!(!result.has_errors());
1470        Ok(())
1471    }
1472
1473    #[tokio::test]
1474    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1475        let tmp = tempfile::tempdir()?;
1476        let schema_path = tmp.path().join("schema.json");
1477        fs::write(&schema_path, SCHEMA)?;
1478
1479        let f = tmp.path().join("config.jsonc");
1480        fs::write(
1481            &f,
1482            format!(
1483                r#"{{
1484  /* JSONC comment */
1485  "$schema": "{}",
1486  "name": "hello"
1487}}"#,
1488                schema_path.to_string_lossy()
1489            ),
1490        )?;
1491
1492        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1493        let c = ValidateArgs {
1494            globs: vec![pattern],
1495            exclude: vec![],
1496            cache_dir: None,
1497            force_schema_fetch: true,
1498            force_validation: true,
1499            no_catalog: true,
1500            config_dir: None,
1501            schema_cache_ttl: None,
1502        };
1503        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1504        assert!(!result.has_errors());
1505        Ok(())
1506    }
1507
1508    // --- Catalog-based schema matching ---
1509
1510    const GH_WORKFLOW_SCHEMA: &str = r#"{
1511        "type": "object",
1512        "properties": {
1513            "name": { "type": "string" },
1514            "on": {},
1515            "jobs": { "type": "object" }
1516        },
1517        "required": ["on", "jobs"]
1518    }"#;
1519
1520    fn gh_catalog_json() -> String {
1521        r#"{"version":1,"schemas":[{
1522            "name": "GitHub Workflow",
1523            "description": "GitHub Actions workflow",
1524            "url": "https://www.schemastore.org/github-workflow.json",
1525            "fileMatch": [
1526                "**/.github/workflows/*.yml",
1527                "**/.github/workflows/*.yaml"
1528            ]
1529        }]}"#
1530            .to_string()
1531    }
1532
1533    #[tokio::test]
1534    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1535        let tmp = tempfile::tempdir()?;
1536        let cache_tmp = tempfile::tempdir()?;
1537        let wf_dir = tmp.path().join(".github/workflows");
1538        fs::create_dir_all(&wf_dir)?;
1539        fs::write(
1540            wf_dir.join("ci.yml"),
1541            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1542        )?;
1543
1544        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1545        let client = mock(&[
1546            (
1547                "https://www.schemastore.org/api/json/catalog.json",
1548                &gh_catalog_json(),
1549            ),
1550            (
1551                "https://www.schemastore.org/github-workflow.json",
1552                GH_WORKFLOW_SCHEMA,
1553            ),
1554        ]);
1555        let c = ValidateArgs {
1556            globs: vec![pattern],
1557            exclude: vec![],
1558            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1559            force_schema_fetch: true,
1560            force_validation: true,
1561            no_catalog: false,
1562            config_dir: None,
1563            schema_cache_ttl: None,
1564        };
1565        let result = run_with(&c, Some(client), |_| {}).await?;
1566        assert!(!result.has_errors());
1567        Ok(())
1568    }
1569
1570    #[tokio::test]
1571    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1572        let tmp = tempfile::tempdir()?;
1573        let cache_tmp = tempfile::tempdir()?;
1574        let wf_dir = tmp.path().join(".github/workflows");
1575        fs::create_dir_all(&wf_dir)?;
1576        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1577
1578        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1579        let client = mock(&[
1580            (
1581                "https://www.schemastore.org/api/json/catalog.json",
1582                &gh_catalog_json(),
1583            ),
1584            (
1585                "https://www.schemastore.org/github-workflow.json",
1586                GH_WORKFLOW_SCHEMA,
1587            ),
1588        ]);
1589        let c = ValidateArgs {
1590            globs: vec![pattern],
1591            exclude: vec![],
1592            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1593            force_schema_fetch: true,
1594            force_validation: true,
1595            no_catalog: false,
1596            config_dir: None,
1597            schema_cache_ttl: None,
1598        };
1599        let result = run_with(&c, Some(client), |_| {}).await?;
1600        assert!(result.has_errors());
1601        Ok(())
1602    }
1603
1604    #[tokio::test]
1605    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1606        let tmp = tempfile::tempdir()?;
1607        let cache_tmp = tempfile::tempdir()?;
1608        let wf_dir = tmp.path().join(".github/workflows");
1609        fs::create_dir_all(&wf_dir)?;
1610        fs::write(
1611            wf_dir.join("ci.yml"),
1612            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1613        )?;
1614
1615        let client = mock(&[
1616            (
1617                "https://www.schemastore.org/api/json/catalog.json",
1618                &gh_catalog_json(),
1619            ),
1620            (
1621                "https://www.schemastore.org/github-workflow.json",
1622                GH_WORKFLOW_SCHEMA,
1623            ),
1624        ]);
1625        let c = ValidateArgs {
1626            globs: vec![],
1627            exclude: vec![],
1628            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1629            force_schema_fetch: true,
1630            force_validation: true,
1631            no_catalog: false,
1632            config_dir: None,
1633            schema_cache_ttl: None,
1634        };
1635
1636        let orig_dir = std::env::current_dir()?;
1637        std::env::set_current_dir(tmp.path())?;
1638        let result = run_with(&c, Some(client), |_| {}).await?;
1639        std::env::set_current_dir(orig_dir)?;
1640
1641        assert!(!result.has_errors());
1642        Ok(())
1643    }
1644
1645    // --- TOML tests ---
1646
1647    #[tokio::test]
1648    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1649        let tmp = tempfile::tempdir()?;
1650        let schema_path = tmp.path().join("schema.json");
1651        fs::write(&schema_path, SCHEMA)?;
1652
1653        let f = tmp.path().join("config.toml");
1654        fs::write(
1655            &f,
1656            format!(
1657                "# :schema {}\nname = \"hello\"\n",
1658                schema_path.to_string_lossy()
1659            ),
1660        )?;
1661
1662        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1663        let c = ValidateArgs {
1664            globs: vec![pattern],
1665            exclude: vec![],
1666            cache_dir: None,
1667            force_schema_fetch: true,
1668            force_validation: true,
1669            no_catalog: true,
1670            config_dir: None,
1671            schema_cache_ttl: None,
1672        };
1673        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1674        assert!(!result.has_errors());
1675        Ok(())
1676    }
1677
1678    // --- Rewrite rules + // resolution ---
1679
1680    #[tokio::test]
1681    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1682        let tmp = tempfile::tempdir()?;
1683
1684        let schemas_dir = tmp.path().join("schemas");
1685        fs::create_dir_all(&schemas_dir)?;
1686        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1687
1688        fs::write(
1689            tmp.path().join("lintel.toml"),
1690            r#"
1691[rewrite]
1692"http://localhost:9000/" = "//schemas/"
1693"#,
1694        )?;
1695
1696        let f = tmp.path().join("config.json");
1697        fs::write(
1698            &f,
1699            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1700        )?;
1701
1702        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1703        let c = ValidateArgs {
1704            globs: vec![pattern],
1705            exclude: vec![],
1706            cache_dir: None,
1707            force_schema_fetch: true,
1708            force_validation: true,
1709            no_catalog: true,
1710            config_dir: Some(tmp.path().to_path_buf()),
1711            schema_cache_ttl: None,
1712        };
1713
1714        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1715        assert!(!result.has_errors());
1716        assert_eq!(result.files_checked(), 1);
1717        Ok(())
1718    }
1719
1720    #[tokio::test]
1721    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1722        let tmp = tempfile::tempdir()?;
1723
1724        let schemas_dir = tmp.path().join("schemas");
1725        fs::create_dir_all(&schemas_dir)?;
1726        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1727
1728        fs::write(tmp.path().join("lintel.toml"), "")?;
1729
1730        let sub = tmp.path().join("deeply/nested");
1731        fs::create_dir_all(&sub)?;
1732        let f = sub.join("config.json");
1733        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1734
1735        let pattern = sub.join("*.json").to_string_lossy().to_string();
1736        let c = ValidateArgs {
1737            globs: vec![pattern],
1738            exclude: vec![],
1739            cache_dir: None,
1740            force_schema_fetch: true,
1741            force_validation: true,
1742            no_catalog: true,
1743            config_dir: Some(tmp.path().to_path_buf()),
1744            schema_cache_ttl: None,
1745        };
1746
1747        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1748        assert!(!result.has_errors());
1749        Ok(())
1750    }
1751
1752    // --- Format validation override ---
1753
1754    const FORMAT_SCHEMA: &str = r#"{
1755        "type": "object",
1756        "properties": {
1757            "link": { "type": "string", "format": "uri-reference" }
1758        }
1759    }"#;
1760
1761    #[tokio::test]
1762    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1763        let tmp = tempfile::tempdir()?;
1764        let schema_path = tmp.path().join("schema.json");
1765        fs::write(&schema_path, FORMAT_SCHEMA)?;
1766
1767        let f = tmp.path().join("data.json");
1768        fs::write(
1769            &f,
1770            format!(
1771                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1772                schema_path.to_string_lossy()
1773            ),
1774        )?;
1775
1776        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1777        let c = ValidateArgs {
1778            globs: vec![pattern],
1779            exclude: vec![],
1780            cache_dir: None,
1781            force_schema_fetch: true,
1782            force_validation: true,
1783            no_catalog: true,
1784            config_dir: Some(tmp.path().to_path_buf()),
1785            schema_cache_ttl: None,
1786        };
1787        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1788        assert!(
1789            result.has_errors(),
1790            "expected format error without override"
1791        );
1792        Ok(())
1793    }
1794
1795    #[tokio::test]
1796    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1797        let tmp = tempfile::tempdir()?;
1798        let schema_path = tmp.path().join("schema.json");
1799        fs::write(&schema_path, FORMAT_SCHEMA)?;
1800
1801        let f = tmp.path().join("data.json");
1802        fs::write(
1803            &f,
1804            format!(
1805                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1806                schema_path.to_string_lossy()
1807            ),
1808        )?;
1809
1810        // Use **/data.json to match the absolute path from the tempdir.
1811        fs::write(
1812            tmp.path().join("lintel.toml"),
1813            r#"
1814[[override]]
1815files = ["**/data.json"]
1816validate_formats = false
1817"#,
1818        )?;
1819
1820        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1821        let c = ValidateArgs {
1822            globs: vec![pattern],
1823            exclude: vec![],
1824            cache_dir: None,
1825            force_schema_fetch: true,
1826            force_validation: true,
1827            no_catalog: true,
1828            config_dir: Some(tmp.path().to_path_buf()),
1829            schema_cache_ttl: None,
1830        };
1831        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1832        assert!(
1833            !result.has_errors(),
1834            "expected no errors with validate_formats = false override"
1835        );
1836        Ok(())
1837    }
1838
1839    // --- Unrecognized extension handling ---
1840
1841    #[tokio::test]
1842    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1843        let tmp = tempfile::tempdir()?;
1844        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1845
1846        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1847        let c = ValidateArgs {
1848            globs: vec![pattern],
1849            exclude: vec![],
1850            cache_dir: None,
1851            force_schema_fetch: true,
1852            force_validation: true,
1853            no_catalog: true,
1854            config_dir: Some(tmp.path().to_path_buf()),
1855            schema_cache_ttl: None,
1856        };
1857        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1858        assert!(!result.has_errors());
1859        assert_eq!(result.files_checked(), 0);
1860        Ok(())
1861    }
1862
1863    #[tokio::test]
1864    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1865        let tmp = tempfile::tempdir()?;
1866        let cache_tmp = tempfile::tempdir()?;
1867        // File has .cfg extension (unrecognized) but content is valid JSON
1868        fs::write(
1869            tmp.path().join("myapp.cfg"),
1870            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1871        )?;
1872
1873        let catalog_json = r#"{"version":1,"schemas":[{
1874            "name": "MyApp Config",
1875            "description": "MyApp configuration",
1876            "url": "https://example.com/myapp.schema.json",
1877            "fileMatch": ["*.cfg"]
1878        }]}"#;
1879        let schema =
1880            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1881
1882        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1883        let client = mock(&[
1884            (
1885                "https://www.schemastore.org/api/json/catalog.json",
1886                catalog_json,
1887            ),
1888            ("https://example.com/myapp.schema.json", schema),
1889        ]);
1890        let c = ValidateArgs {
1891            globs: vec![pattern],
1892            exclude: vec![],
1893            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1894            force_schema_fetch: true,
1895            force_validation: true,
1896            no_catalog: false,
1897            config_dir: Some(tmp.path().to_path_buf()),
1898            schema_cache_ttl: None,
1899        };
1900        let result = run_with(&c, Some(client), |_| {}).await?;
1901        assert!(!result.has_errors());
1902        assert_eq!(result.files_checked(), 1);
1903        Ok(())
1904    }
1905
1906    #[tokio::test]
1907    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1908        let tmp = tempfile::tempdir()?;
1909        let cache_tmp = tempfile::tempdir()?;
1910        // File matches catalog but content isn't parseable by any format
1911        fs::write(
1912            tmp.path().join("myapp.cfg"),
1913            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1914        )?;
1915
1916        let catalog_json = r#"{"version":1,"schemas":[{
1917            "name": "MyApp Config",
1918            "description": "MyApp configuration",
1919            "url": "https://example.com/myapp.schema.json",
1920            "fileMatch": ["*.cfg"]
1921        }]}"#;
1922
1923        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1924        let client = mock(&[(
1925            "https://www.schemastore.org/api/json/catalog.json",
1926            catalog_json,
1927        )]);
1928        let c = ValidateArgs {
1929            globs: vec![pattern],
1930            exclude: vec![],
1931            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1932            force_schema_fetch: true,
1933            force_validation: true,
1934            no_catalog: false,
1935            config_dir: Some(tmp.path().to_path_buf()),
1936            schema_cache_ttl: None,
1937        };
1938        let result = run_with(&c, Some(client), |_| {}).await?;
1939        assert!(!result.has_errors());
1940        assert_eq!(result.files_checked(), 0);
1941        Ok(())
1942    }
1943
1944    #[tokio::test]
1945    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1946        let tmp = tempfile::tempdir()?;
1947        let cache_tmp = tempfile::tempdir()?;
1948        // File has .cfg extension, content is valid JSON but fails schema validation
1949        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1950
1951        let catalog_json = r#"{"version":1,"schemas":[{
1952            "name": "MyApp Config",
1953            "description": "MyApp configuration",
1954            "url": "https://example.com/myapp.schema.json",
1955            "fileMatch": ["*.cfg"]
1956        }]}"#;
1957        let schema =
1958            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1959
1960        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1961        let client = mock(&[
1962            (
1963                "https://www.schemastore.org/api/json/catalog.json",
1964                catalog_json,
1965            ),
1966            ("https://example.com/myapp.schema.json", schema),
1967        ]);
1968        let c = ValidateArgs {
1969            globs: vec![pattern],
1970            exclude: vec![],
1971            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1972            force_schema_fetch: true,
1973            force_validation: true,
1974            no_catalog: false,
1975            config_dir: Some(tmp.path().to_path_buf()),
1976            schema_cache_ttl: None,
1977        };
1978        let result = run_with(&c, Some(client), |_| {}).await?;
1979        assert!(result.has_errors());
1980        assert_eq!(result.files_checked(), 1);
1981        Ok(())
1982    }
1983
1984    // --- Validation cache ---
1985
1986    #[tokio::test]
1987    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1988        let tmp = tempfile::tempdir()?;
1989        let schema_path = tmp.path().join("schema.json");
1990        fs::write(&schema_path, SCHEMA)?;
1991
1992        let f = tmp.path().join("valid.json");
1993        fs::write(
1994            &f,
1995            format!(
1996                r#"{{"$schema":"{}","name":"hello"}}"#,
1997                schema_path.to_string_lossy()
1998            ),
1999        )?;
2000
2001        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2002
2003        // First run: force_validation = false so results get cached
2004        let c = ValidateArgs {
2005            globs: vec![pattern.clone()],
2006            exclude: vec![],
2007            cache_dir: None,
2008            force_schema_fetch: true,
2009            force_validation: false,
2010            no_catalog: true,
2011            config_dir: None,
2012            schema_cache_ttl: None,
2013        };
2014        let mut first_statuses = Vec::new();
2015        let result = run_with(&c, Some(mock(&[])), |cf| {
2016            first_statuses.push(cf.validation_cache_status);
2017        })
2018        .await?;
2019        assert!(!result.has_errors());
2020        assert!(result.files_checked() > 0);
2021
2022        // Verify the first run recorded a validation cache miss
2023        assert!(
2024            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2025            "expected at least one validation cache miss on first run"
2026        );
2027
2028        // Second run: same file, same schema — should hit validation cache
2029        let mut second_statuses = Vec::new();
2030        let result = run_with(&c, Some(mock(&[])), |cf| {
2031            second_statuses.push(cf.validation_cache_status);
2032        })
2033        .await?;
2034        assert!(!result.has_errors());
2035
2036        // Verify the second run got a validation cache hit
2037        assert!(
2038            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2039            "expected at least one validation cache hit on second run"
2040        );
2041        Ok(())
2042    }
2043
2044    // --- clean_error_message ---
2045
2046    #[test]
2047    fn clean_strips_anyof_value() {
2048        let msg =
2049            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2050        assert_eq!(
2051            clean_error_message(msg.to_string()),
2052            "not valid under any of the schemas listed in the 'anyOf' keyword"
2053        );
2054    }
2055
2056    #[test]
2057    fn clean_strips_oneof_value() {
2058        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2059        assert_eq!(
2060            clean_error_message(msg.to_string()),
2061            "not valid under any of the schemas listed in the 'oneOf' keyword"
2062        );
2063    }
2064
2065    #[test]
2066    fn clean_strips_long_value() {
2067        let long_value = "x".repeat(5000);
2068        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2069        let msg = format!("{long_value}{suffix}");
2070        assert_eq!(
2071            clean_error_message(msg),
2072            "not valid under any of the schemas listed in the 'anyOf' keyword"
2073        );
2074    }
2075
2076    #[test]
2077    fn clean_preserves_type_error() {
2078        let msg = r#"12345 is not of types "null", "string""#;
2079        assert_eq!(clean_error_message(msg.to_string()), msg);
2080    }
2081
2082    #[test]
2083    fn clean_preserves_required_property() {
2084        let msg = "\"name\" is a required property";
2085        assert_eq!(clean_error_message(msg.to_string()), msg);
2086    }
2087
2088    #[tokio::test]
2089    async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2090        let tmp = tempfile::tempdir()?;
2091
2092        // Referenced schema with a "name" string definition
2093        std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2094
2095        // Main schema that uses a relative $ref
2096        let schema_path = tmp.path().join("schema.json");
2097        std::fs::write(
2098            &schema_path,
2099            r#"{
2100                "type": "object",
2101                "properties": {
2102                    "name": { "$ref": "./defs.json" }
2103                },
2104                "required": ["name"]
2105            }"#,
2106        )?;
2107
2108        // Valid data file pointing to the local schema
2109        let schema_uri = schema_path.to_string_lossy();
2110        std::fs::write(
2111            tmp.path().join("data.json"),
2112            format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2113        )?;
2114
2115        // Invalid data file (name should be a string per defs.json)
2116        std::fs::write(
2117            tmp.path().join("bad.json"),
2118            format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2119        )?;
2120
2121        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2122        let args = ValidateArgs {
2123            globs: vec![pattern],
2124            exclude: vec![],
2125            cache_dir: None,
2126            force_schema_fetch: true,
2127            force_validation: true,
2128            no_catalog: true,
2129            config_dir: None,
2130            schema_cache_ttl: None,
2131        };
2132        let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2133
2134        // The invalid file should produce an error (name is 42, not a string)
2135        assert!(result.has_errors());
2136        // Exactly one file should have errors (bad.json), the other (data.json) should pass
2137        assert_eq!(result.errors.len(), 1);
2138        Ok(())
2139    }
2140}