Skip to main content

lintel_validate/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use lintel_diagnostics::reporter::{CheckResult, CheckedFile};
11use lintel_diagnostics::{DEFAULT_LABEL, LintelDiagnostic, find_instance_path_span, format_label};
12use lintel_schema_cache::{CacheStatus, SchemaCache};
13use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
14use schema_catalog::{CompiledCatalog, FileFormat};
15
16use crate::catalog;
17use crate::discover;
18use crate::parsers::{self, Parser};
19use crate::registry;
20
21/// Conservative limit for concurrent file reads to avoid exhausting file
22/// descriptors. 128 is well below the default soft limit on macOS (256) and
23/// Linux (1024) while still providing good throughput.
24const FD_CONCURRENCY_LIMIT: usize = 128;
25
26/// Composite retriever that dispatches `file://` URIs to local disk reads
27/// and everything else to the HTTP-backed [`SchemaCache`].
28struct LocalRetriever {
29    http: SchemaCache,
30}
31
32#[async_trait::async_trait]
33impl jsonschema::AsyncRetrieve for LocalRetriever {
34    async fn retrieve(
35        &self,
36        uri: &jsonschema::Uri<String>,
37    ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
38        let s = uri.as_str();
39        if let Some(raw) = s.strip_prefix("file://") {
40            let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
41            let content = tokio::fs::read_to_string(path.as_ref()).await?;
42            Ok(serde_json::from_str(&content)?)
43        } else {
44            self.http.retrieve(uri).await
45        }
46    }
47}
48
49pub struct ValidateArgs {
50    /// Glob patterns to find files (empty = auto-discover)
51    pub globs: Vec<String>,
52
53    /// Exclude files matching these globs (repeatable)
54    pub exclude: Vec<String>,
55
56    /// Cache directory for remote schemas
57    pub cache_dir: Option<String>,
58
59    /// Bypass schema cache reads (still writes fetched schemas to cache)
60    pub force_schema_fetch: bool,
61
62    /// Bypass validation cache reads (still writes results to cache)
63    pub force_validation: bool,
64
65    /// Disable `SchemaStore` catalog matching
66    pub no_catalog: bool,
67
68    /// Directory to search for `lintel.toml` (defaults to cwd)
69    pub config_dir: Option<PathBuf>,
70
71    /// TTL for cached schemas. `None` means no expiry.
72    pub schema_cache_ttl: Option<core::time::Duration>,
73}
74
75// ---------------------------------------------------------------------------
76// Internal types
77// ---------------------------------------------------------------------------
78
79/// A file that has been parsed and matched to a schema URI.
80struct ParsedFile {
81    path: String,
82    content: String,
83    instance: Value,
84    /// Original schema URI before rewrites (for override matching).
85    original_schema_uri: String,
86}
87
88// ---------------------------------------------------------------------------
89// Config loading
90// ---------------------------------------------------------------------------
91
92/// Locate `lintel.toml`, load the full config, and return the config directory.
93/// Returns `(config, config_dir, config_path)`.  When no config is found or
94/// cwd is unavailable the config is default and `config_path` is `None`.
95#[tracing::instrument(skip_all)]
96pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
97    let start_dir = match search_dir {
98        Some(d) => d.to_path_buf(),
99        None => match std::env::current_dir() {
100            Ok(d) => d,
101            Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
102        },
103    };
104
105    let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
106        return (lintel_config::Config::default(), start_dir, None);
107    };
108
109    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
110    let cfg = lintel_config::find_and_load(&start_dir)
111        .ok()
112        .flatten()
113        .unwrap_or_default();
114    (cfg, dir, Some(config_path))
115}
116
117// ---------------------------------------------------------------------------
118// File collection
119// ---------------------------------------------------------------------------
120
121/// Collect input files from globs/directories, applying exclude filters.
122///
123/// # Errors
124///
125/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
126#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
127pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
128    if globs.is_empty() {
129        return discover::discover_files(".", exclude);
130    }
131
132    let mut result = Vec::new();
133    for pattern in globs {
134        let path = Path::new(pattern);
135        if path.is_dir() {
136            result.extend(discover::discover_files(pattern, exclude)?);
137        } else {
138            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
139                let path = entry?;
140                if path.is_file() && !is_excluded(&path, exclude) {
141                    result.push(path);
142                }
143            }
144        }
145    }
146    Ok(result)
147}
148
149fn is_excluded(path: &Path, excludes: &[String]) -> bool {
150    let path_str = match path.to_str() {
151        Some(s) => s.strip_prefix("./").unwrap_or(s),
152        None => return false,
153    };
154    excludes
155        .iter()
156        .any(|pattern| glob_match::glob_match(pattern, path_str))
157}
158
159// ---------------------------------------------------------------------------
160// Phase 1: Parse files and resolve schema URIs
161// ---------------------------------------------------------------------------
162
163/// Try parsing content with each known format, returning the first success.
164///
165/// JSONC is tried first (superset of JSON, handles comments), then YAML and
166/// TOML which cover the most common config formats, followed by the rest.
167pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
168    use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
169    const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
170
171    for fmt in FORMATS {
172        let parser = parsers::parser_for(fmt);
173        if let Ok(val) = parser.parse(content, file_name) {
174            return Some((fmt, val));
175        }
176    }
177    None
178}
179
180/// Result of processing a single file: either a parsed file with its schema URI,
181/// a lint error, or nothing (file was skipped).
182enum FileResult {
183    Parsed {
184        schema_uri: String,
185        parsed: ParsedFile,
186    },
187    Error(LintelDiagnostic),
188    Skip,
189}
190
191/// Resolve a relative local schema path against a base directory.
192///
193/// Remote URIs (http/https) are returned unchanged. For local paths, joins with
194/// the provided base directory (file's parent for inline `$schema`, config dir
195/// for config/catalog sources).
196fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
197    if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
198        return schema_uri.to_string();
199    }
200    if let Some(dir) = base_dir {
201        dir.join(schema_uri).to_string_lossy().to_string()
202    } else {
203        schema_uri.to_string()
204    }
205}
206
207/// Process a single file's already-read content: parse and resolve schema URI.
208///
209/// Returns a `Vec` because JSONL files expand to one result per non-empty line.
210#[allow(clippy::too_many_arguments)]
211fn process_one_file(
212    path: &Path,
213    content: String,
214    config: &lintel_config::Config,
215    config_dir: &Path,
216    compiled_catalogs: &[CompiledCatalog],
217) -> Vec<FileResult> {
218    let path_str = path.display().to_string();
219    let file_name = path
220        .file_name()
221        .and_then(|n| n.to_str())
222        .unwrap_or(&path_str);
223
224    let detected_format = parsers::detect_format(path);
225
226    // JSONL files get special per-line handling.
227    if detected_format == Some(FileFormat::Jsonl) {
228        return process_jsonl_file(
229            path,
230            &path_str,
231            file_name,
232            &content,
233            config,
234            config_dir,
235            compiled_catalogs,
236        );
237    }
238
239    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
240    if detected_format.is_none() {
241        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
242            || compiled_catalogs
243                .iter()
244                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
245        if !has_match {
246            return vec![FileResult::Skip];
247        }
248    }
249
250    // Parse the file content.
251    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
252        let parser = parsers::parser_for(fmt);
253        match parser.parse(&content, &path_str) {
254            Ok(val) => (parser, val),
255            Err(parse_err) => return vec![FileResult::Error(parse_err)],
256        }
257    } else {
258        match try_parse_all(&content, &path_str) {
259            Some((fmt, val)) => (parsers::parser_for(fmt), val),
260            None => return vec![FileResult::Skip],
261        }
262    };
263
264    // Skip markdown files with no frontmatter
265    if instance.is_null() {
266        return vec![FileResult::Skip];
267    }
268
269    // Schema resolution priority:
270    // 1. Inline $schema / YAML modeline (always wins)
271    // 2. Custom schema mappings from lintel.toml [schemas]
272    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
273    //
274    // Track whether the URI came from inline $schema (resolve relative to file)
275    // or from config/catalog (resolve relative to config dir).
276    let inline_uri = parser.extract_schema_uri(&content, &instance);
277    let from_inline = inline_uri.is_some();
278    let schema_uri = inline_uri
279        .or_else(|| {
280            config
281                .find_schema_mapping(&path_str, file_name)
282                .map(str::to_string)
283        })
284        .or_else(|| {
285            compiled_catalogs
286                .iter()
287                .find_map(|cat| cat.find_schema(&path_str, file_name))
288                .map(str::to_string)
289        });
290
291    let Some(schema_uri) = schema_uri else {
292        return vec![FileResult::Skip];
293    };
294
295    // Keep original URI for override matching (before rewrites)
296    let original_schema_uri = schema_uri.clone();
297
298    // Apply rewrite rules, then resolve // paths relative to lintel.toml
299    let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
300    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
301
302    // Resolve relative local paths:
303    // - Inline $schema: relative to the file's parent directory
304    // - Config/catalog: relative to the config directory (where lintel.toml lives)
305    let schema_uri = resolve_local_schema_path(
306        &schema_uri,
307        if from_inline {
308            path.parent()
309        } else {
310            Some(config_dir)
311        },
312    );
313
314    vec![FileResult::Parsed {
315        schema_uri,
316        parsed: ParsedFile {
317            path: path_str,
318            content,
319            instance,
320            original_schema_uri,
321        },
322    }]
323}
324
325/// Process a JSONL file: parse each line independently and resolve schemas.
326///
327/// Each non-empty line becomes its own [`FileResult::Parsed`]. Schema resolution
328/// priority per line: inline `$schema` on the line > config mapping > catalog.
329///
330/// Also checks schema consistency across lines — mismatches are emitted as
331/// [`FileResult::Error`] so they flow through the normal Reporter pipeline.
332#[allow(clippy::too_many_arguments)]
333fn process_jsonl_file(
334    path: &Path,
335    path_str: &str,
336    file_name: &str,
337    content: &str,
338    config: &lintel_config::Config,
339    config_dir: &Path,
340    compiled_catalogs: &[CompiledCatalog],
341) -> Vec<FileResult> {
342    let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
343        Ok(lines) => lines,
344        Err(parse_err) => return vec![FileResult::Error(parse_err)],
345    };
346
347    if lines.is_empty() {
348        return vec![FileResult::Skip];
349    }
350
351    let mut results = Vec::with_capacity(lines.len());
352
353    // Check schema consistency before consuming lines.
354    if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
355        for m in mismatches {
356            results.push(FileResult::Error(LintelDiagnostic::SchemaMismatch {
357                path: path_str.to_string(),
358                line_number: m.line_number,
359                message: format!("expected consistent $schema but found {}", m.schema_uri),
360            }));
361        }
362    }
363
364    for line in lines {
365        // Schema resolution: inline $schema on line > config > catalog
366        // Track source to resolve relative paths correctly.
367        let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
368        let from_inline = inline_uri.is_some();
369        let schema_uri = inline_uri
370            .or_else(|| {
371                config
372                    .find_schema_mapping(path_str, file_name)
373                    .map(str::to_string)
374            })
375            .or_else(|| {
376                compiled_catalogs
377                    .iter()
378                    .find_map(|cat| cat.find_schema(path_str, file_name))
379                    .map(str::to_string)
380            });
381
382        let Some(schema_uri) = schema_uri else {
383            continue;
384        };
385
386        let original_schema_uri = schema_uri.clone();
387
388        let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
389        let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
390
391        // Inline $schema: relative to file's parent. Config/catalog: relative to config dir.
392        let schema_uri = resolve_local_schema_path(
393            &schema_uri,
394            if from_inline {
395                path.parent()
396            } else {
397                Some(config_dir)
398            },
399        );
400
401        let line_path = format!("{path_str}:{}", line.line_number);
402
403        results.push(FileResult::Parsed {
404            schema_uri,
405            parsed: ParsedFile {
406                path: line_path,
407                content: line.raw,
408                instance: line.value,
409                original_schema_uri,
410            },
411        });
412    }
413
414    if results.is_empty() {
415        vec![FileResult::Skip]
416    } else {
417        results
418    }
419}
420
421/// Read files concurrently with tokio, using a semaphore to avoid exhausting
422/// file descriptors. I/O errors are pushed as `LintelDiagnostic::Io`.
423///
424/// # Panics
425///
426/// Panics if the internal semaphore is unexpectedly closed (should not happen).
427#[tracing::instrument(skip_all, fields(file_count = files.len()))]
428pub async fn read_files(
429    files: &[PathBuf],
430    errors: &mut Vec<LintelDiagnostic>,
431) -> Vec<(PathBuf, String)> {
432    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
433    let mut read_set = tokio::task::JoinSet::new();
434    for path in files {
435        let path = path.clone();
436        let sem = semaphore.clone();
437        read_set.spawn(async move {
438            let _permit = sem.acquire().await.expect("semaphore closed");
439            let result = tokio::fs::read_to_string(&path).await;
440            (path, result)
441        });
442    }
443
444    let mut file_contents = Vec::with_capacity(files.len());
445    while let Some(result) = read_set.join_next().await {
446        match result {
447            Ok((path, Ok(content))) => file_contents.push((path, content)),
448            Ok((path, Err(e))) => {
449                errors.push(LintelDiagnostic::Io {
450                    path: path.display().to_string(),
451                    message: format!("failed to read: {e}"),
452                });
453            }
454            Err(e) => tracing::warn!("file read task panicked: {e}"),
455        }
456    }
457
458    file_contents
459}
460
461/// Parse pre-read file contents, extract schema URIs, apply rewrites, and
462/// group by resolved schema URI.
463#[tracing::instrument(skip_all, fields(file_count = file_contents.len()))]
464#[allow(clippy::too_many_arguments)]
465fn parse_and_group_contents(
466    file_contents: Vec<(PathBuf, String)>,
467    config: &lintel_config::Config,
468    config_dir: &Path,
469    compiled_catalogs: &[CompiledCatalog],
470    errors: &mut Vec<LintelDiagnostic>,
471) -> BTreeMap<String, Vec<ParsedFile>> {
472    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
473    for (path, content) in file_contents {
474        let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
475        for result in results {
476            match result {
477                FileResult::Parsed { schema_uri, parsed } => {
478                    schema_groups.entry(schema_uri).or_default().push(parsed);
479                }
480                FileResult::Error(e) => errors.push(e),
481                FileResult::Skip => {}
482            }
483        }
484    }
485
486    schema_groups
487}
488
489// ---------------------------------------------------------------------------
490// Phase 2: Schema fetching, compilation, and instance validation
491// ---------------------------------------------------------------------------
492
493/// Fetch a schema by URI, returning its parsed JSON and cache status.
494///
495/// For remote URIs, checks the prefetched map first; for local URIs, reads
496/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
497#[allow(clippy::too_many_arguments)]
498async fn fetch_schema_from_prefetched(
499    schema_uri: &str,
500    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
501    local_cache: &mut HashMap<String, Value>,
502    group: &[ParsedFile],
503    errors: &mut Vec<LintelDiagnostic>,
504    checked: &mut Vec<CheckedFile>,
505    on_check: &mut impl FnMut(&CheckedFile),
506) -> Option<(Value, Option<CacheStatus>)> {
507    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
508
509    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
510        match prefetched.get(schema_uri) {
511            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
512            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
513            None => Err(format!("schema not prefetched: {schema_uri}")),
514        }
515    } else if let Some(cached) = local_cache.get(schema_uri) {
516        Ok((cached.clone(), None))
517    } else {
518        tokio::fs::read_to_string(schema_uri)
519            .await
520            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
521            .and_then(|content| {
522                serde_json::from_str::<Value>(&content)
523                    .map(|v| {
524                        local_cache.insert(schema_uri.to_string(), v.clone());
525                        (v, None)
526                    })
527                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
528            })
529    };
530
531    match result {
532        Ok(value) => Some(value),
533        Err(message) => {
534            report_group_error(
535                |path| LintelDiagnostic::SchemaFetch {
536                    path: path.to_string(),
537                    message: message.clone(),
538                },
539                schema_uri,
540                None,
541                group,
542                errors,
543                checked,
544                on_check,
545            );
546            None
547        }
548    }
549}
550
551/// Report the same error for every file in a schema group.
552#[allow(clippy::too_many_arguments)]
553fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
554    make_error: impl Fn(&str) -> LintelDiagnostic,
555    schema_uri: &str,
556    cache_status: Option<CacheStatus>,
557    group: &[P],
558    errors: &mut Vec<LintelDiagnostic>,
559    checked: &mut Vec<CheckedFile>,
560    on_check: &mut impl FnMut(&CheckedFile),
561) {
562    for item in group {
563        let pf = item.borrow();
564        let cf = CheckedFile {
565            path: pf.path.clone(),
566            schema: schema_uri.to_string(),
567            cache_status,
568            validation_cache_status: None,
569        };
570        on_check(&cf);
571        checked.push(cf);
572        errors.push(make_error(&pf.path));
573    }
574}
575
576/// Mark every file in a group as checked (no errors).
577#[allow(clippy::too_many_arguments)]
578fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
579    schema_uri: &str,
580    cache_status: Option<CacheStatus>,
581    validation_cache_status: Option<ValidationCacheStatus>,
582    group: &[P],
583    checked: &mut Vec<CheckedFile>,
584    on_check: &mut impl FnMut(&CheckedFile),
585) {
586    for item in group {
587        let pf = item.borrow();
588        let cf = CheckedFile {
589            path: pf.path.clone(),
590            schema: schema_uri.to_string(),
591            cache_status,
592            validation_cache_status,
593        };
594        on_check(&cf);
595        checked.push(cf);
596    }
597}
598
599/// Clean up error messages from the `jsonschema` crate.
600///
601/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
602/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
603/// The source snippet already shows the value, so we strip the redundant prefix
604/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
605///
606/// All other messages are returned unchanged.
607fn clean_error_message(msg: String) -> String {
608    const MARKER: &str = " is not valid under any of the schemas listed in the '";
609    if let Some(pos) = msg.find(MARKER) {
610        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
611        return msg[pos + 4..].to_string();
612    }
613    msg
614}
615
616/// Convert [`ValidationError`]s into [`LintelDiagnostic::Validation`] diagnostics.
617fn push_validation_errors(
618    pf: &ParsedFile,
619    schema_url: &str,
620    validation_errors: &[ValidationError],
621    errors: &mut Vec<LintelDiagnostic>,
622) {
623    for ve in validation_errors {
624        let span = find_instance_path_span(&pf.content, &ve.instance_path);
625        let instance_path = if ve.instance_path.is_empty() {
626            DEFAULT_LABEL.to_string()
627        } else {
628            ve.instance_path.clone()
629        };
630        let label = format_label(&instance_path, &ve.schema_path);
631        let source_span: miette::SourceSpan = span.into();
632        errors.push(LintelDiagnostic::Validation {
633            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
634            span: source_span,
635            schema_span: source_span,
636            path: pf.path.clone(),
637            instance_path,
638            label,
639            message: ve.message.clone(),
640            schema_url: schema_url.to_string(),
641            schema_path: ve.schema_path.clone(),
642        });
643    }
644}
645
646/// Validate all files in a group against an already-compiled validator and store
647/// results in the validation cache.
648#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
649#[allow(clippy::too_many_arguments)]
650async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
651    validator: &jsonschema::Validator,
652    schema_uri: &str,
653    schema_hash: &str,
654    validate_formats: bool,
655    cache_status: Option<CacheStatus>,
656    group: &[P],
657    vcache: &lintel_validation_cache::ValidationCache,
658    errors: &mut Vec<LintelDiagnostic>,
659    checked: &mut Vec<CheckedFile>,
660    on_check: &mut impl FnMut(&CheckedFile),
661) {
662    for item in group {
663        let pf = item.borrow();
664        let file_errors: Vec<ValidationError> = validator
665            .iter_errors(&pf.instance)
666            .map(|error| ValidationError {
667                instance_path: error.instance_path().to_string(),
668                message: clean_error_message(error.to_string()),
669                schema_path: error.schema_path().to_string(),
670            })
671            .collect();
672
673        vcache
674            .store(
675                &lintel_validation_cache::CacheKey {
676                    file_content: &pf.content,
677                    schema_hash,
678                    validate_formats,
679                },
680                &file_errors,
681            )
682            .await;
683        push_validation_errors(pf, schema_uri, &file_errors, errors);
684
685        let cf = CheckedFile {
686            path: pf.path.clone(),
687            schema: schema_uri.to_string(),
688            cache_status,
689            validation_cache_status: Some(ValidationCacheStatus::Miss),
690        };
691        on_check(&cf);
692        checked.push(cf);
693    }
694}
695
696// ---------------------------------------------------------------------------
697// Public API
698// ---------------------------------------------------------------------------
699
700/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
701///
702/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
703pub async fn fetch_compiled_catalogs(
704    retriever: &SchemaCache,
705    config: &lintel_config::Config,
706    no_catalog: bool,
707) -> Vec<CompiledCatalog> {
708    let mut compiled_catalogs = Vec::new();
709
710    if !no_catalog {
711        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
712
713        // Catalogs are fetched concurrently but sorted by priority so that
714        // the Lintel catalog wins over custom registries, which win over
715        // SchemaStore.  The `order` field encodes this precedence.
716        #[allow(clippy::items_after_statements)]
717        type CatalogResult = (
718            usize, // priority (lower = higher precedence)
719            String,
720            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
721        );
722        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
723
724        // Custom registries from lintel.toml (highest precedence among catalogs)
725        for (i, registry_url) in config.registries.iter().enumerate() {
726            let r = retriever.clone();
727            let url = registry_url.clone();
728            let label = format!("registry {url}");
729            catalog_tasks.spawn(async move {
730                let result = registry::fetch(&r, &url)
731                    .await
732                    .map(|cat| CompiledCatalog::compile(&cat));
733                (i, label, result)
734            });
735        }
736
737        // Lintel catalog
738        let lintel_order = config.registries.len();
739        if !config.no_default_catalog {
740            let r = retriever.clone();
741            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
742            catalog_tasks.spawn(async move {
743                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
744                    .await
745                    .map(|cat| CompiledCatalog::compile(&cat));
746                (lintel_order, label, result)
747            });
748        }
749
750        // SchemaStore catalog (lowest precedence)
751        let schemastore_order = config.registries.len() + 1;
752        let r = retriever.clone();
753        catalog_tasks.spawn(async move {
754            let result = catalog::fetch_catalog(&r)
755                .await
756                .map(|cat| CompiledCatalog::compile(&cat));
757            (schemastore_order, "SchemaStore catalog".to_string(), result)
758        });
759
760        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
761        while let Some(result) = catalog_tasks.join_next().await {
762            match result {
763                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
764                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
765                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
766            }
767        }
768        results.sort_by_key(|(order, _)| *order);
769        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
770
771        drop(catalog_span);
772    }
773
774    compiled_catalogs
775}
776
777/// # Errors
778///
779/// Returns an error if file collection or schema validation encounters an I/O error.
780pub async fn run(args: &ValidateArgs) -> Result<CheckResult> {
781    run_with(args, None, |_| {}).await
782}
783
784/// Like [`run`], but calls `on_check` each time a file is checked, allowing
785/// callers to stream progress (e.g. verbose output) as files are processed.
786///
787/// # Errors
788///
789/// Returns an error if file collection or schema validation encounters an I/O error.
790#[tracing::instrument(skip_all, name = "validate")]
791pub async fn run_with(
792    args: &ValidateArgs,
793    cache: Option<SchemaCache>,
794    mut on_check: impl FnMut(&CheckedFile),
795) -> Result<CheckResult> {
796    let retriever = build_retriever(args, cache);
797    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
798    let files = collect_files(&args.globs, &args.exclude)?;
799    tracing::info!(file_count = files.len(), "collected files");
800
801    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
802
803    let mut errors: Vec<LintelDiagnostic> = Vec::new();
804    let file_contents = read_files(&files, &mut errors).await;
805
806    run_with_contents_inner(
807        file_contents,
808        args,
809        retriever,
810        config,
811        &config_dir,
812        compiled_catalogs,
813        errors,
814        &mut on_check,
815    )
816    .await
817}
818
819/// Like [`run_with`], but accepts pre-read file contents instead of reading
820/// from disk. Use this when the caller has already read files (e.g. to share
821/// reads between format checking and validation).
822///
823/// # Errors
824///
825/// Returns an error if schema validation encounters an I/O or network error.
826pub async fn run_with_contents(
827    args: &ValidateArgs,
828    file_contents: Vec<(PathBuf, String)>,
829    cache: Option<SchemaCache>,
830    mut on_check: impl FnMut(&CheckedFile),
831) -> Result<CheckResult> {
832    let retriever = build_retriever(args, cache);
833    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
834    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
835    let errors: Vec<LintelDiagnostic> = Vec::new();
836
837    run_with_contents_inner(
838        file_contents,
839        args,
840        retriever,
841        config,
842        &config_dir,
843        compiled_catalogs,
844        errors,
845        &mut on_check,
846    )
847    .await
848}
849
850fn build_retriever(args: &ValidateArgs, cache: Option<SchemaCache>) -> SchemaCache {
851    if let Some(c) = cache {
852        return c;
853    }
854    let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
855    if let Some(dir) = &args.cache_dir {
856        let path = PathBuf::from(dir);
857        let _ = fs::create_dir_all(&path);
858        builder = builder.cache_dir(path);
859    }
860    if let Some(ttl) = args.schema_cache_ttl {
861        builder = builder.ttl(ttl);
862    }
863    builder.build()
864}
865
866#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
867async fn run_with_contents_inner(
868    file_contents: Vec<(PathBuf, String)>,
869    args: &ValidateArgs,
870    retriever: SchemaCache,
871    config: lintel_config::Config,
872    config_dir: &Path,
873    compiled_catalogs: Vec<CompiledCatalog>,
874    mut errors: Vec<LintelDiagnostic>,
875    on_check: &mut impl FnMut(&CheckedFile),
876) -> Result<CheckResult> {
877    let mut checked: Vec<CheckedFile> = Vec::new();
878
879    // Phase 1: Parse files and resolve schema URIs
880    let schema_groups = parse_and_group_contents(
881        file_contents,
882        &config,
883        config_dir,
884        &compiled_catalogs,
885        &mut errors,
886    );
887    tracing::info!(
888        schema_count = schema_groups.len(),
889        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
890        "grouped files by schema"
891    );
892
893    // Create validation cache
894    let vcache = lintel_validation_cache::ValidationCache::new(
895        lintel_validation_cache::ensure_cache_dir(),
896        args.force_validation,
897    );
898
899    // Prefetch all remote schemas in parallel
900    let remote_uris: Vec<&String> = schema_groups
901        .keys()
902        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
903        .collect();
904
905    let prefetched = {
906        let _prefetch_span =
907            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
908
909        let mut schema_tasks = tokio::task::JoinSet::new();
910        for uri in remote_uris {
911            let r = retriever.clone();
912            let u = uri.clone();
913            schema_tasks.spawn(async move {
914                let result = r.fetch(&u).await;
915                (u, result)
916            });
917        }
918
919        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
920        while let Some(result) = schema_tasks.join_next().await {
921            match result {
922                Ok((uri, fetch_result)) => {
923                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
924                }
925                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
926            }
927        }
928
929        prefetched
930    };
931
932    // Phase 2: Compile each schema once and validate all matching files
933    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
934    let mut fetch_time = core::time::Duration::ZERO;
935    let mut hash_time = core::time::Duration::ZERO;
936    let mut vcache_time = core::time::Duration::ZERO;
937    let mut compile_time = core::time::Duration::ZERO;
938    let mut validate_time = core::time::Duration::ZERO;
939
940    for (schema_uri, group) in &schema_groups {
941        let _group_span = tracing::debug_span!(
942            "schema_group",
943            schema = schema_uri.as_str(),
944            files = group.len(),
945        )
946        .entered();
947
948        // If ANY file in the group matches a `validate_formats = false` override,
949        // disable format validation for the whole group (they share one compiled validator).
950        let validate_formats = group.iter().all(|pf| {
951            config
952                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
953        });
954
955        // Remote schemas were prefetched in parallel above; local schemas are
956        // read from disk here (with in-memory caching).
957        let t = std::time::Instant::now();
958        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
959            schema_uri,
960            &prefetched,
961            &mut local_schema_cache,
962            group,
963            &mut errors,
964            &mut checked,
965            on_check,
966        )
967        .await
968        else {
969            fetch_time += t.elapsed();
970            continue;
971        };
972        fetch_time += t.elapsed();
973
974        // Pre-compute schema hash once for the entire group.
975        let t = std::time::Instant::now();
976        let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
977        hash_time += t.elapsed();
978
979        // Split the group into validation cache hits and misses.
980        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
981
982        let t = std::time::Instant::now();
983        for pf in group {
984            let (cached, vcache_status) = vcache
985                .lookup(&lintel_validation_cache::CacheKey {
986                    file_content: &pf.content,
987                    schema_hash: &schema_hash,
988                    validate_formats,
989                })
990                .await;
991
992            if let Some(cached_errors) = cached {
993                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
994                let cf = CheckedFile {
995                    path: pf.path.clone(),
996                    schema: schema_uri.clone(),
997                    cache_status,
998                    validation_cache_status: Some(vcache_status),
999                };
1000                on_check(&cf);
1001                checked.push(cf);
1002            } else {
1003                cache_misses.push(pf);
1004            }
1005        }
1006        vcache_time += t.elapsed();
1007
1008        tracing::debug!(
1009            cache_hits = group.len() - cache_misses.len(),
1010            cache_misses = cache_misses.len(),
1011            "validation cache"
1012        );
1013
1014        // If all files hit the validation cache, skip schema compilation entirely.
1015        if cache_misses.is_empty() {
1016            continue;
1017        }
1018
1019        // Compile the schema for cache misses.
1020        let t = std::time::Instant::now();
1021        let validator = {
1022            // Set base URI so relative $ref values (e.g. "./rule.json") resolve
1023            // correctly. Remote schemas use the HTTP URI directly; local schemas
1024            // get a file:// URI derived from the canonical absolute path.
1025            let is_remote_schema =
1026                schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
1027            let local_retriever = LocalRetriever {
1028                http: retriever.clone(),
1029            };
1030            let opts = jsonschema::async_options()
1031                .with_retriever(local_retriever)
1032                .should_validate_formats(validate_formats);
1033            let base_uri = if is_remote_schema {
1034                // Strip fragment (e.g. "#") — base URIs must not contain fragments.
1035                let uri = match schema_uri.find('#') {
1036                    Some(pos) => schema_uri[..pos].to_string(),
1037                    None => schema_uri.clone(),
1038                };
1039                Some(uri)
1040            } else {
1041                std::fs::canonicalize(schema_uri)
1042                    .ok()
1043                    .map(|p| format!("file://{}", p.display()))
1044            };
1045            let opts = if let Some(uri) = base_uri {
1046                opts.with_base_uri(uri)
1047            } else {
1048                opts
1049            };
1050            match opts.build(&schema_value).await {
1051                Ok(v) => v,
1052                Err(e) => {
1053                    compile_time += t.elapsed();
1054                    // When format validation is disabled and the compilation error
1055                    // is a uri-reference issue (e.g. Rust-style $ref paths in
1056                    // vector.json), skip validation silently.
1057                    if !validate_formats && e.to_string().contains("uri-reference") {
1058                        mark_group_checked(
1059                            schema_uri,
1060                            cache_status,
1061                            Some(ValidationCacheStatus::Miss),
1062                            &cache_misses,
1063                            &mut checked,
1064                            on_check,
1065                        );
1066                        continue;
1067                    }
1068                    let msg = format!("failed to compile schema: {e}");
1069                    report_group_error(
1070                        |path| LintelDiagnostic::SchemaCompile {
1071                            path: path.to_string(),
1072                            message: msg.clone(),
1073                        },
1074                        schema_uri,
1075                        cache_status,
1076                        &cache_misses,
1077                        &mut errors,
1078                        &mut checked,
1079                        on_check,
1080                    );
1081                    continue;
1082                }
1083            }
1084        };
1085        compile_time += t.elapsed();
1086
1087        let t = std::time::Instant::now();
1088        validate_group(
1089            &validator,
1090            schema_uri,
1091            &schema_hash,
1092            validate_formats,
1093            cache_status,
1094            &cache_misses,
1095            &vcache,
1096            &mut errors,
1097            &mut checked,
1098            on_check,
1099        )
1100        .await;
1101        validate_time += t.elapsed();
1102    }
1103
1104    #[allow(clippy::cast_possible_truncation)]
1105    {
1106        tracing::info!(
1107            fetch_ms = fetch_time.as_millis() as u64,
1108            hash_ms = hash_time.as_millis() as u64,
1109            vcache_ms = vcache_time.as_millis() as u64,
1110            compile_ms = compile_time.as_millis() as u64,
1111            validate_ms = validate_time.as_millis() as u64,
1112            "phase2 breakdown"
1113        );
1114    }
1115
1116    // Sort errors for deterministic output (by path, then by span offset)
1117    errors.sort_by(|a, b| {
1118        a.path()
1119            .cmp(b.path())
1120            .then_with(|| a.offset().cmp(&b.offset()))
1121    });
1122
1123    Ok(CheckResult { errors, checked })
1124}
1125
1126#[cfg(test)]
1127mod tests {
1128    use super::*;
1129    use lintel_schema_cache::SchemaCache;
1130    use std::path::Path;
1131
1132    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1133        let cache = SchemaCache::memory();
1134        for (uri, body) in entries {
1135            cache.insert(
1136                uri,
1137                serde_json::from_str(body).expect("test mock: invalid JSON"),
1138            );
1139        }
1140        cache
1141    }
1142
1143    fn testdata() -> PathBuf {
1144        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1145    }
1146
1147    /// Build glob patterns that scan one or more testdata directories for all supported file types.
1148    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1149        dirs.iter()
1150            .flat_map(|dir| {
1151                let base = testdata().join(dir);
1152                vec![
1153                    base.join("*.json").to_string_lossy().to_string(),
1154                    base.join("*.yaml").to_string_lossy().to_string(),
1155                    base.join("*.yml").to_string_lossy().to_string(),
1156                    base.join("*.json5").to_string_lossy().to_string(),
1157                    base.join("*.jsonc").to_string_lossy().to_string(),
1158                    base.join("*.toml").to_string_lossy().to_string(),
1159                ]
1160            })
1161            .collect()
1162    }
1163
1164    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1165        ValidateArgs {
1166            globs: scenario_globs(dirs),
1167            exclude: vec![],
1168            cache_dir: None,
1169            force_schema_fetch: true,
1170            force_validation: true,
1171            no_catalog: true,
1172            config_dir: None,
1173            schema_cache_ttl: None,
1174        }
1175    }
1176
1177    const SCHEMA: &str =
1178        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1179
1180    fn schema_mock() -> SchemaCache {
1181        mock(&[("https://example.com/schema.json", SCHEMA)])
1182    }
1183
1184    // --- Directory scanning tests ---
1185
1186    #[tokio::test]
1187    async fn no_matching_files() -> anyhow::Result<()> {
1188        let tmp = tempfile::tempdir()?;
1189        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1190        let c = ValidateArgs {
1191            globs: vec![pattern],
1192            exclude: vec![],
1193            cache_dir: None,
1194            force_schema_fetch: true,
1195            force_validation: true,
1196            no_catalog: true,
1197            config_dir: None,
1198            schema_cache_ttl: None,
1199        };
1200        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1201        assert!(!result.has_errors());
1202        Ok(())
1203    }
1204
1205    #[tokio::test]
1206    async fn dir_all_valid() -> anyhow::Result<()> {
1207        let c = args_for_dirs(&["positive_tests"]);
1208        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1209        assert!(!result.has_errors());
1210        Ok(())
1211    }
1212
1213    #[tokio::test]
1214    async fn dir_all_invalid() -> anyhow::Result<()> {
1215        let c = args_for_dirs(&["negative_tests"]);
1216        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1217        assert!(result.has_errors());
1218        Ok(())
1219    }
1220
1221    #[tokio::test]
1222    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1223        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1224        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1225        assert!(result.has_errors());
1226        Ok(())
1227    }
1228
1229    #[tokio::test]
1230    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1231        let c = args_for_dirs(&["no_schema"]);
1232        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1233        assert!(!result.has_errors());
1234        Ok(())
1235    }
1236
1237    #[tokio::test]
1238    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1239        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1240        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1241        assert!(!result.has_errors());
1242        Ok(())
1243    }
1244
1245    // --- Directory as positional arg ---
1246
1247    #[tokio::test]
1248    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1249        let dir = testdata().join("positive_tests");
1250        let c = ValidateArgs {
1251            globs: vec![dir.to_string_lossy().to_string()],
1252            exclude: vec![],
1253            cache_dir: None,
1254            force_schema_fetch: true,
1255            force_validation: true,
1256            no_catalog: true,
1257            config_dir: None,
1258            schema_cache_ttl: None,
1259        };
1260        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1261        assert!(!result.has_errors());
1262        assert!(result.files_checked() > 0);
1263        Ok(())
1264    }
1265
1266    #[tokio::test]
1267    async fn multiple_directory_args() -> anyhow::Result<()> {
1268        let pos_dir = testdata().join("positive_tests");
1269        let no_schema_dir = testdata().join("no_schema");
1270        let c = ValidateArgs {
1271            globs: vec![
1272                pos_dir.to_string_lossy().to_string(),
1273                no_schema_dir.to_string_lossy().to_string(),
1274            ],
1275            exclude: vec![],
1276            cache_dir: None,
1277            force_schema_fetch: true,
1278            force_validation: true,
1279            no_catalog: true,
1280            config_dir: None,
1281            schema_cache_ttl: None,
1282        };
1283        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1284        assert!(!result.has_errors());
1285        Ok(())
1286    }
1287
1288    #[tokio::test]
1289    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1290        let dir = testdata().join("positive_tests");
1291        let glob_pattern = testdata()
1292            .join("no_schema")
1293            .join("*.json")
1294            .to_string_lossy()
1295            .to_string();
1296        let c = ValidateArgs {
1297            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1298            exclude: vec![],
1299            cache_dir: None,
1300            force_schema_fetch: true,
1301            force_validation: true,
1302            no_catalog: true,
1303            config_dir: None,
1304            schema_cache_ttl: None,
1305        };
1306        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1307        assert!(!result.has_errors());
1308        Ok(())
1309    }
1310
1311    #[tokio::test]
1312    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1313        let base = testdata().join("malformed");
1314        let c = ValidateArgs {
1315            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1316            exclude: vec![],
1317            cache_dir: None,
1318            force_schema_fetch: true,
1319            force_validation: true,
1320            no_catalog: true,
1321            config_dir: None,
1322            schema_cache_ttl: None,
1323        };
1324        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1325        assert!(result.has_errors());
1326        Ok(())
1327    }
1328
1329    #[tokio::test]
1330    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1331        let base = testdata().join("malformed");
1332        let c = ValidateArgs {
1333            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1334            exclude: vec![],
1335            cache_dir: None,
1336            force_schema_fetch: true,
1337            force_validation: true,
1338            no_catalog: true,
1339            config_dir: None,
1340            schema_cache_ttl: None,
1341        };
1342        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1343        assert!(result.has_errors());
1344        Ok(())
1345    }
1346
1347    // --- Exclude filter ---
1348
1349    #[tokio::test]
1350    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1351        let base = testdata().join("negative_tests");
1352        let c = ValidateArgs {
1353            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1354            exclude: vec![
1355                base.join("missing_name.json").to_string_lossy().to_string(),
1356                base.join("missing_name.toml").to_string_lossy().to_string(),
1357                base.join("missing_name.yaml").to_string_lossy().to_string(),
1358            ],
1359            cache_dir: None,
1360            force_schema_fetch: true,
1361            force_validation: true,
1362            no_catalog: true,
1363            config_dir: None,
1364            schema_cache_ttl: None,
1365        };
1366        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1367        assert!(!result.has_errors());
1368        Ok(())
1369    }
1370
1371    // --- Cache options ---
1372
1373    #[tokio::test]
1374    async fn custom_cache_dir() -> anyhow::Result<()> {
1375        let c = ValidateArgs {
1376            globs: scenario_globs(&["positive_tests"]),
1377            exclude: vec![],
1378            cache_dir: None,
1379            force_schema_fetch: true,
1380            force_validation: true,
1381            no_catalog: true,
1382            config_dir: None,
1383            schema_cache_ttl: None,
1384        };
1385        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1386        assert!(!result.has_errors());
1387        Ok(())
1388    }
1389
1390    // --- Local schema ---
1391
1392    #[tokio::test]
1393    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1394        let tmp = tempfile::tempdir()?;
1395        let schema_path = tmp.path().join("schema.json");
1396        fs::write(&schema_path, SCHEMA)?;
1397
1398        let f = tmp.path().join("valid.json");
1399        fs::write(
1400            &f,
1401            format!(
1402                r#"{{"$schema":"{}","name":"hello"}}"#,
1403                schema_path.to_string_lossy()
1404            ),
1405        )?;
1406
1407        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1408        let c = ValidateArgs {
1409            globs: vec![pattern],
1410            exclude: vec![],
1411            cache_dir: None,
1412            force_schema_fetch: true,
1413            force_validation: true,
1414            no_catalog: true,
1415            config_dir: None,
1416            schema_cache_ttl: None,
1417        };
1418        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1419        assert!(!result.has_errors());
1420        Ok(())
1421    }
1422
1423    #[tokio::test]
1424    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1425        let tmp = tempfile::tempdir()?;
1426        let schema_path = tmp.path().join("schema.json");
1427        fs::write(&schema_path, SCHEMA)?;
1428
1429        let f = tmp.path().join("valid.yaml");
1430        fs::write(
1431            &f,
1432            format!(
1433                "# yaml-language-server: $schema={}\nname: hello\n",
1434                schema_path.to_string_lossy()
1435            ),
1436        )?;
1437
1438        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1439        let c = ValidateArgs {
1440            globs: vec![pattern],
1441            exclude: vec![],
1442            cache_dir: None,
1443            force_schema_fetch: true,
1444            force_validation: true,
1445            no_catalog: true,
1446            config_dir: None,
1447            schema_cache_ttl: None,
1448        };
1449        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1450        assert!(!result.has_errors());
1451        Ok(())
1452    }
1453
1454    #[tokio::test]
1455    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1456        let tmp = tempfile::tempdir()?;
1457        let f = tmp.path().join("ref.json");
1458        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1459
1460        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1461        let c = ValidateArgs {
1462            globs: vec![pattern],
1463            exclude: vec![],
1464            cache_dir: None,
1465            force_schema_fetch: true,
1466            force_validation: true,
1467            no_catalog: true,
1468            config_dir: None,
1469            schema_cache_ttl: None,
1470        };
1471        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1472        assert!(result.has_errors());
1473        Ok(())
1474    }
1475
1476    // --- JSON5 / JSONC tests ---
1477
1478    #[tokio::test]
1479    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1480        let tmp = tempfile::tempdir()?;
1481        let schema_path = tmp.path().join("schema.json");
1482        fs::write(&schema_path, SCHEMA)?;
1483
1484        let f = tmp.path().join("config.json5");
1485        fs::write(
1486            &f,
1487            format!(
1488                r#"{{
1489  // JSON5 comment
1490  "$schema": "{}",
1491  name: "hello",
1492}}"#,
1493                schema_path.to_string_lossy()
1494            ),
1495        )?;
1496
1497        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1498        let c = ValidateArgs {
1499            globs: vec![pattern],
1500            exclude: vec![],
1501            cache_dir: None,
1502            force_schema_fetch: true,
1503            force_validation: true,
1504            no_catalog: true,
1505            config_dir: None,
1506            schema_cache_ttl: None,
1507        };
1508        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1509        assert!(!result.has_errors());
1510        Ok(())
1511    }
1512
1513    #[tokio::test]
1514    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1515        let tmp = tempfile::tempdir()?;
1516        let schema_path = tmp.path().join("schema.json");
1517        fs::write(&schema_path, SCHEMA)?;
1518
1519        let f = tmp.path().join("config.jsonc");
1520        fs::write(
1521            &f,
1522            format!(
1523                r#"{{
1524  /* JSONC comment */
1525  "$schema": "{}",
1526  "name": "hello"
1527}}"#,
1528                schema_path.to_string_lossy()
1529            ),
1530        )?;
1531
1532        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1533        let c = ValidateArgs {
1534            globs: vec![pattern],
1535            exclude: vec![],
1536            cache_dir: None,
1537            force_schema_fetch: true,
1538            force_validation: true,
1539            no_catalog: true,
1540            config_dir: None,
1541            schema_cache_ttl: None,
1542        };
1543        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1544        assert!(!result.has_errors());
1545        Ok(())
1546    }
1547
1548    // --- Catalog-based schema matching ---
1549
1550    const GH_WORKFLOW_SCHEMA: &str = r#"{
1551        "type": "object",
1552        "properties": {
1553            "name": { "type": "string" },
1554            "on": {},
1555            "jobs": { "type": "object" }
1556        },
1557        "required": ["on", "jobs"]
1558    }"#;
1559
1560    fn gh_catalog_json() -> String {
1561        r#"{"version":1,"schemas":[{
1562            "name": "GitHub Workflow",
1563            "description": "GitHub Actions workflow",
1564            "url": "https://www.schemastore.org/github-workflow.json",
1565            "fileMatch": [
1566                "**/.github/workflows/*.yml",
1567                "**/.github/workflows/*.yaml"
1568            ]
1569        }]}"#
1570            .to_string()
1571    }
1572
1573    #[tokio::test]
1574    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1575        let tmp = tempfile::tempdir()?;
1576        let cache_tmp = tempfile::tempdir()?;
1577        let wf_dir = tmp.path().join(".github/workflows");
1578        fs::create_dir_all(&wf_dir)?;
1579        fs::write(
1580            wf_dir.join("ci.yml"),
1581            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1582        )?;
1583
1584        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1585        let client = mock(&[
1586            (
1587                "https://www.schemastore.org/api/json/catalog.json",
1588                &gh_catalog_json(),
1589            ),
1590            (
1591                "https://www.schemastore.org/github-workflow.json",
1592                GH_WORKFLOW_SCHEMA,
1593            ),
1594        ]);
1595        let c = ValidateArgs {
1596            globs: vec![pattern],
1597            exclude: vec![],
1598            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1599            force_schema_fetch: true,
1600            force_validation: true,
1601            no_catalog: false,
1602            config_dir: None,
1603            schema_cache_ttl: None,
1604        };
1605        let result = run_with(&c, Some(client), |_| {}).await?;
1606        assert!(!result.has_errors());
1607        Ok(())
1608    }
1609
1610    #[tokio::test]
1611    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1612        let tmp = tempfile::tempdir()?;
1613        let cache_tmp = tempfile::tempdir()?;
1614        let wf_dir = tmp.path().join(".github/workflows");
1615        fs::create_dir_all(&wf_dir)?;
1616        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1617
1618        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1619        let client = mock(&[
1620            (
1621                "https://www.schemastore.org/api/json/catalog.json",
1622                &gh_catalog_json(),
1623            ),
1624            (
1625                "https://www.schemastore.org/github-workflow.json",
1626                GH_WORKFLOW_SCHEMA,
1627            ),
1628        ]);
1629        let c = ValidateArgs {
1630            globs: vec![pattern],
1631            exclude: vec![],
1632            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1633            force_schema_fetch: true,
1634            force_validation: true,
1635            no_catalog: false,
1636            config_dir: None,
1637            schema_cache_ttl: None,
1638        };
1639        let result = run_with(&c, Some(client), |_| {}).await?;
1640        assert!(result.has_errors());
1641        Ok(())
1642    }
1643
1644    #[tokio::test]
1645    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1646        let tmp = tempfile::tempdir()?;
1647        let cache_tmp = tempfile::tempdir()?;
1648        let wf_dir = tmp.path().join(".github/workflows");
1649        fs::create_dir_all(&wf_dir)?;
1650        fs::write(
1651            wf_dir.join("ci.yml"),
1652            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1653        )?;
1654
1655        let client = mock(&[
1656            (
1657                "https://www.schemastore.org/api/json/catalog.json",
1658                &gh_catalog_json(),
1659            ),
1660            (
1661                "https://www.schemastore.org/github-workflow.json",
1662                GH_WORKFLOW_SCHEMA,
1663            ),
1664        ]);
1665        let c = ValidateArgs {
1666            globs: vec![],
1667            exclude: vec![],
1668            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1669            force_schema_fetch: true,
1670            force_validation: true,
1671            no_catalog: false,
1672            config_dir: None,
1673            schema_cache_ttl: None,
1674        };
1675
1676        let orig_dir = std::env::current_dir()?;
1677        std::env::set_current_dir(tmp.path())?;
1678        let result = run_with(&c, Some(client), |_| {}).await?;
1679        std::env::set_current_dir(orig_dir)?;
1680
1681        assert!(!result.has_errors());
1682        Ok(())
1683    }
1684
1685    // --- TOML tests ---
1686
1687    #[tokio::test]
1688    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1689        let tmp = tempfile::tempdir()?;
1690        let schema_path = tmp.path().join("schema.json");
1691        fs::write(&schema_path, SCHEMA)?;
1692
1693        let f = tmp.path().join("config.toml");
1694        fs::write(
1695            &f,
1696            format!(
1697                "# :schema {}\nname = \"hello\"\n",
1698                schema_path.to_string_lossy()
1699            ),
1700        )?;
1701
1702        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1703        let c = ValidateArgs {
1704            globs: vec![pattern],
1705            exclude: vec![],
1706            cache_dir: None,
1707            force_schema_fetch: true,
1708            force_validation: true,
1709            no_catalog: true,
1710            config_dir: None,
1711            schema_cache_ttl: None,
1712        };
1713        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1714        assert!(!result.has_errors());
1715        Ok(())
1716    }
1717
1718    // --- Rewrite rules + // resolution ---
1719
1720    #[tokio::test]
1721    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1722        let tmp = tempfile::tempdir()?;
1723
1724        let schemas_dir = tmp.path().join("schemas");
1725        fs::create_dir_all(&schemas_dir)?;
1726        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1727
1728        fs::write(
1729            tmp.path().join("lintel.toml"),
1730            r#"
1731[rewrite]
1732"http://localhost:9000/" = "//schemas/"
1733"#,
1734        )?;
1735
1736        let f = tmp.path().join("config.json");
1737        fs::write(
1738            &f,
1739            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1740        )?;
1741
1742        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1743        let c = ValidateArgs {
1744            globs: vec![pattern],
1745            exclude: vec![],
1746            cache_dir: None,
1747            force_schema_fetch: true,
1748            force_validation: true,
1749            no_catalog: true,
1750            config_dir: Some(tmp.path().to_path_buf()),
1751            schema_cache_ttl: None,
1752        };
1753
1754        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1755        assert!(!result.has_errors());
1756        assert_eq!(result.files_checked(), 1);
1757        Ok(())
1758    }
1759
1760    #[tokio::test]
1761    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1762        let tmp = tempfile::tempdir()?;
1763
1764        let schemas_dir = tmp.path().join("schemas");
1765        fs::create_dir_all(&schemas_dir)?;
1766        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1767
1768        fs::write(tmp.path().join("lintel.toml"), "")?;
1769
1770        let sub = tmp.path().join("deeply/nested");
1771        fs::create_dir_all(&sub)?;
1772        let f = sub.join("config.json");
1773        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1774
1775        let pattern = sub.join("*.json").to_string_lossy().to_string();
1776        let c = ValidateArgs {
1777            globs: vec![pattern],
1778            exclude: vec![],
1779            cache_dir: None,
1780            force_schema_fetch: true,
1781            force_validation: true,
1782            no_catalog: true,
1783            config_dir: Some(tmp.path().to_path_buf()),
1784            schema_cache_ttl: None,
1785        };
1786
1787        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1788        assert!(!result.has_errors());
1789        Ok(())
1790    }
1791
1792    // --- Format validation override ---
1793
1794    const FORMAT_SCHEMA: &str = r#"{
1795        "type": "object",
1796        "properties": {
1797            "link": { "type": "string", "format": "uri-reference" }
1798        }
1799    }"#;
1800
1801    #[tokio::test]
1802    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1803        let tmp = tempfile::tempdir()?;
1804        let schema_path = tmp.path().join("schema.json");
1805        fs::write(&schema_path, FORMAT_SCHEMA)?;
1806
1807        let f = tmp.path().join("data.json");
1808        fs::write(
1809            &f,
1810            format!(
1811                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1812                schema_path.to_string_lossy()
1813            ),
1814        )?;
1815
1816        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1817        let c = ValidateArgs {
1818            globs: vec![pattern],
1819            exclude: vec![],
1820            cache_dir: None,
1821            force_schema_fetch: true,
1822            force_validation: true,
1823            no_catalog: true,
1824            config_dir: Some(tmp.path().to_path_buf()),
1825            schema_cache_ttl: None,
1826        };
1827        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1828        assert!(
1829            result.has_errors(),
1830            "expected format error without override"
1831        );
1832        Ok(())
1833    }
1834
1835    #[tokio::test]
1836    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1837        let tmp = tempfile::tempdir()?;
1838        let schema_path = tmp.path().join("schema.json");
1839        fs::write(&schema_path, FORMAT_SCHEMA)?;
1840
1841        let f = tmp.path().join("data.json");
1842        fs::write(
1843            &f,
1844            format!(
1845                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1846                schema_path.to_string_lossy()
1847            ),
1848        )?;
1849
1850        // Use **/data.json to match the absolute path from the tempdir.
1851        fs::write(
1852            tmp.path().join("lintel.toml"),
1853            r#"
1854[[override]]
1855files = ["**/data.json"]
1856validate_formats = false
1857"#,
1858        )?;
1859
1860        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1861        let c = ValidateArgs {
1862            globs: vec![pattern],
1863            exclude: vec![],
1864            cache_dir: None,
1865            force_schema_fetch: true,
1866            force_validation: true,
1867            no_catalog: true,
1868            config_dir: Some(tmp.path().to_path_buf()),
1869            schema_cache_ttl: None,
1870        };
1871        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1872        assert!(
1873            !result.has_errors(),
1874            "expected no errors with validate_formats = false override"
1875        );
1876        Ok(())
1877    }
1878
1879    // --- Unrecognized extension handling ---
1880
1881    #[tokio::test]
1882    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1883        let tmp = tempfile::tempdir()?;
1884        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1885
1886        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1887        let c = ValidateArgs {
1888            globs: vec![pattern],
1889            exclude: vec![],
1890            cache_dir: None,
1891            force_schema_fetch: true,
1892            force_validation: true,
1893            no_catalog: true,
1894            config_dir: Some(tmp.path().to_path_buf()),
1895            schema_cache_ttl: None,
1896        };
1897        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1898        assert!(!result.has_errors());
1899        assert_eq!(result.files_checked(), 0);
1900        Ok(())
1901    }
1902
1903    #[tokio::test]
1904    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1905        let tmp = tempfile::tempdir()?;
1906        let cache_tmp = tempfile::tempdir()?;
1907        // File has .cfg extension (unrecognized) but content is valid JSON
1908        fs::write(
1909            tmp.path().join("myapp.cfg"),
1910            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1911        )?;
1912
1913        let catalog_json = r#"{"version":1,"schemas":[{
1914            "name": "MyApp Config",
1915            "description": "MyApp configuration",
1916            "url": "https://example.com/myapp.schema.json",
1917            "fileMatch": ["*.cfg"]
1918        }]}"#;
1919        let schema =
1920            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1921
1922        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1923        let client = mock(&[
1924            (
1925                "https://www.schemastore.org/api/json/catalog.json",
1926                catalog_json,
1927            ),
1928            ("https://example.com/myapp.schema.json", schema),
1929        ]);
1930        let c = ValidateArgs {
1931            globs: vec![pattern],
1932            exclude: vec![],
1933            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1934            force_schema_fetch: true,
1935            force_validation: true,
1936            no_catalog: false,
1937            config_dir: Some(tmp.path().to_path_buf()),
1938            schema_cache_ttl: None,
1939        };
1940        let result = run_with(&c, Some(client), |_| {}).await?;
1941        assert!(!result.has_errors());
1942        assert_eq!(result.files_checked(), 1);
1943        Ok(())
1944    }
1945
1946    #[tokio::test]
1947    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1948        let tmp = tempfile::tempdir()?;
1949        let cache_tmp = tempfile::tempdir()?;
1950        // File matches catalog but content isn't parseable by any format
1951        fs::write(
1952            tmp.path().join("myapp.cfg"),
1953            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1954        )?;
1955
1956        let catalog_json = r#"{"version":1,"schemas":[{
1957            "name": "MyApp Config",
1958            "description": "MyApp configuration",
1959            "url": "https://example.com/myapp.schema.json",
1960            "fileMatch": ["*.cfg"]
1961        }]}"#;
1962
1963        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1964        let client = mock(&[(
1965            "https://www.schemastore.org/api/json/catalog.json",
1966            catalog_json,
1967        )]);
1968        let c = ValidateArgs {
1969            globs: vec![pattern],
1970            exclude: vec![],
1971            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1972            force_schema_fetch: true,
1973            force_validation: true,
1974            no_catalog: false,
1975            config_dir: Some(tmp.path().to_path_buf()),
1976            schema_cache_ttl: None,
1977        };
1978        let result = run_with(&c, Some(client), |_| {}).await?;
1979        assert!(!result.has_errors());
1980        assert_eq!(result.files_checked(), 0);
1981        Ok(())
1982    }
1983
1984    #[tokio::test]
1985    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1986        let tmp = tempfile::tempdir()?;
1987        let cache_tmp = tempfile::tempdir()?;
1988        // File has .cfg extension, content is valid JSON but fails schema validation
1989        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1990
1991        let catalog_json = r#"{"version":1,"schemas":[{
1992            "name": "MyApp Config",
1993            "description": "MyApp configuration",
1994            "url": "https://example.com/myapp.schema.json",
1995            "fileMatch": ["*.cfg"]
1996        }]}"#;
1997        let schema =
1998            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1999
2000        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2001        let client = mock(&[
2002            (
2003                "https://www.schemastore.org/api/json/catalog.json",
2004                catalog_json,
2005            ),
2006            ("https://example.com/myapp.schema.json", schema),
2007        ]);
2008        let c = ValidateArgs {
2009            globs: vec![pattern],
2010            exclude: vec![],
2011            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2012            force_schema_fetch: true,
2013            force_validation: true,
2014            no_catalog: false,
2015            config_dir: Some(tmp.path().to_path_buf()),
2016            schema_cache_ttl: None,
2017        };
2018        let result = run_with(&c, Some(client), |_| {}).await?;
2019        assert!(result.has_errors());
2020        assert_eq!(result.files_checked(), 1);
2021        Ok(())
2022    }
2023
2024    // --- Validation cache ---
2025
2026    #[tokio::test]
2027    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
2028        let tmp = tempfile::tempdir()?;
2029        let schema_path = tmp.path().join("schema.json");
2030        fs::write(&schema_path, SCHEMA)?;
2031
2032        let f = tmp.path().join("valid.json");
2033        fs::write(
2034            &f,
2035            format!(
2036                r#"{{"$schema":"{}","name":"hello"}}"#,
2037                schema_path.to_string_lossy()
2038            ),
2039        )?;
2040
2041        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2042
2043        // First run: force_validation = false so results get cached
2044        let c = ValidateArgs {
2045            globs: vec![pattern.clone()],
2046            exclude: vec![],
2047            cache_dir: None,
2048            force_schema_fetch: true,
2049            force_validation: false,
2050            no_catalog: true,
2051            config_dir: None,
2052            schema_cache_ttl: None,
2053        };
2054        let mut first_statuses = Vec::new();
2055        let result = run_with(&c, Some(mock(&[])), |cf| {
2056            first_statuses.push(cf.validation_cache_status);
2057        })
2058        .await?;
2059        assert!(!result.has_errors());
2060        assert!(result.files_checked() > 0);
2061
2062        // Verify the first run recorded a validation cache miss
2063        assert!(
2064            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2065            "expected at least one validation cache miss on first run"
2066        );
2067
2068        // Second run: same file, same schema — should hit validation cache
2069        let mut second_statuses = Vec::new();
2070        let result = run_with(&c, Some(mock(&[])), |cf| {
2071            second_statuses.push(cf.validation_cache_status);
2072        })
2073        .await?;
2074        assert!(!result.has_errors());
2075
2076        // Verify the second run got a validation cache hit
2077        assert!(
2078            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2079            "expected at least one validation cache hit on second run"
2080        );
2081        Ok(())
2082    }
2083
2084    // --- clean_error_message ---
2085
2086    #[test]
2087    fn clean_strips_anyof_value() {
2088        let msg =
2089            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2090        assert_eq!(
2091            clean_error_message(msg.to_string()),
2092            "not valid under any of the schemas listed in the 'anyOf' keyword"
2093        );
2094    }
2095
2096    #[test]
2097    fn clean_strips_oneof_value() {
2098        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2099        assert_eq!(
2100            clean_error_message(msg.to_string()),
2101            "not valid under any of the schemas listed in the 'oneOf' keyword"
2102        );
2103    }
2104
2105    #[test]
2106    fn clean_strips_long_value() {
2107        let long_value = "x".repeat(5000);
2108        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2109        let msg = format!("{long_value}{suffix}");
2110        assert_eq!(
2111            clean_error_message(msg),
2112            "not valid under any of the schemas listed in the 'anyOf' keyword"
2113        );
2114    }
2115
2116    #[test]
2117    fn clean_preserves_type_error() {
2118        let msg = r#"12345 is not of types "null", "string""#;
2119        assert_eq!(clean_error_message(msg.to_string()), msg);
2120    }
2121
2122    #[test]
2123    fn clean_preserves_required_property() {
2124        let msg = "\"name\" is a required property";
2125        assert_eq!(clean_error_message(msg.to_string()), msg);
2126    }
2127
2128    /// Schemas whose URI contains a fragment (e.g. `…/draft-07/schema#`)
2129    /// must compile without error — the fragment is stripped before being
2130    /// used as the base URI for `$ref` resolution.
2131    #[tokio::test]
2132    async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2133        let tmp = tempfile::tempdir()?;
2134
2135        // A minimal draft-07 schema whose `$schema` ends with `#`.
2136        let schema_body = r#"{
2137            "$schema": "http://json-schema.org/draft-07/schema#",
2138            "type": "object",
2139            "properties": { "name": { "type": "string" } },
2140            "required": ["name"]
2141        }"#;
2142
2143        let schema_url = "http://json-schema.org/draft-07/schema#";
2144
2145        let f = tmp.path().join("data.json");
2146        fs::write(
2147            &f,
2148            format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2149        )?;
2150
2151        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2152        let client = mock(&[(
2153            // The schema URI with fragment — exactly as the `$schema` value appears.
2154            schema_url,
2155            schema_body,
2156        )]);
2157        let c = ValidateArgs {
2158            globs: vec![pattern],
2159            exclude: vec![],
2160            cache_dir: None,
2161            force_schema_fetch: true,
2162            force_validation: true,
2163            no_catalog: true,
2164            config_dir: None,
2165            schema_cache_ttl: None,
2166        };
2167        let result = run_with(&c, Some(client), |_| {}).await?;
2168        assert!(
2169            !result.has_errors(),
2170            "schema URI with fragment should not cause compilation error"
2171        );
2172        assert_eq!(result.files_checked(), 1);
2173        Ok(())
2174    }
2175
2176    #[tokio::test]
2177    async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2178        let tmp = tempfile::tempdir()?;
2179
2180        // Referenced schema with a "name" string definition
2181        std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2182
2183        // Main schema that uses a relative $ref
2184        let schema_path = tmp.path().join("schema.json");
2185        std::fs::write(
2186            &schema_path,
2187            r#"{
2188                "type": "object",
2189                "properties": {
2190                    "name": { "$ref": "./defs.json" }
2191                },
2192                "required": ["name"]
2193            }"#,
2194        )?;
2195
2196        // Valid data file pointing to the local schema
2197        let schema_uri = schema_path.to_string_lossy();
2198        std::fs::write(
2199            tmp.path().join("data.json"),
2200            format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2201        )?;
2202
2203        // Invalid data file (name should be a string per defs.json)
2204        std::fs::write(
2205            tmp.path().join("bad.json"),
2206            format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2207        )?;
2208
2209        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2210        let args = ValidateArgs {
2211            globs: vec![pattern],
2212            exclude: vec![],
2213            cache_dir: None,
2214            force_schema_fetch: true,
2215            force_validation: true,
2216            no_catalog: true,
2217            config_dir: None,
2218            schema_cache_ttl: None,
2219        };
2220        let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2221
2222        // The invalid file should produce an error (name is 42, not a string)
2223        assert!(result.has_errors());
2224        // Exactly one file should have errors (bad.json), the other (data.json) should pass
2225        assert_eq!(result.errors.len(), 1);
2226        Ok(())
2227    }
2228}