Skip to main content

lintel_validate/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog;
11use lintel_schema_cache::{CacheStatus, SchemaCache};
12use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
13use schema_catalog::{CompiledCatalog, FileFormat};
14
15use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
16use crate::discover;
17use crate::parsers::{self, Parser};
18use crate::registry;
19
20/// Conservative limit for concurrent file reads to avoid exhausting file
21/// descriptors. 128 is well below the default soft limit on macOS (256) and
22/// Linux (1024) while still providing good throughput.
23const FD_CONCURRENCY_LIMIT: usize = 128;
24
25/// Composite retriever that dispatches `file://` URIs to local disk reads
26/// and everything else to the HTTP-backed [`SchemaCache`].
27struct LocalRetriever {
28    http: SchemaCache,
29}
30
31#[async_trait::async_trait]
32impl jsonschema::AsyncRetrieve for LocalRetriever {
33    async fn retrieve(
34        &self,
35        uri: &jsonschema::Uri<String>,
36    ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
37        let s = uri.as_str();
38        if let Some(raw) = s.strip_prefix("file://") {
39            let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
40            let content = tokio::fs::read_to_string(path.as_ref()).await?;
41            Ok(serde_json::from_str(&content)?)
42        } else {
43            self.http.retrieve(uri).await
44        }
45    }
46}
47
48pub struct ValidateArgs {
49    /// Glob patterns to find files (empty = auto-discover)
50    pub globs: Vec<String>,
51
52    /// Exclude files matching these globs (repeatable)
53    pub exclude: Vec<String>,
54
55    /// Cache directory for remote schemas
56    pub cache_dir: Option<String>,
57
58    /// Bypass schema cache reads (still writes fetched schemas to cache)
59    pub force_schema_fetch: bool,
60
61    /// Bypass validation cache reads (still writes results to cache)
62    pub force_validation: bool,
63
64    /// Disable `SchemaStore` catalog matching
65    pub no_catalog: bool,
66
67    /// Directory to search for `lintel.toml` (defaults to cwd)
68    pub config_dir: Option<PathBuf>,
69
70    /// TTL for cached schemas. `None` means no expiry.
71    pub schema_cache_ttl: Option<core::time::Duration>,
72}
73
74/// Re-exported from [`crate::diagnostics::LintError`] so callers can use
75/// `lintel_validate::validate::LintError` without importing diagnostics.
76pub use crate::diagnostics::LintError;
77
78/// A file that was checked and the schema it resolved to.
79pub struct CheckedFile {
80    pub path: String,
81    pub schema: String,
82    /// `None` for local schemas and builtins; `Some` for remote schemas.
83    pub cache_status: Option<CacheStatus>,
84    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
85    pub validation_cache_status: Option<ValidationCacheStatus>,
86}
87
88/// Result of a validation run.
89pub struct ValidateResult {
90    pub errors: Vec<LintError>,
91    pub checked: Vec<CheckedFile>,
92}
93
94impl ValidateResult {
95    pub fn has_errors(&self) -> bool {
96        !self.errors.is_empty()
97    }
98
99    pub fn files_checked(&self) -> usize {
100        self.checked.len()
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Internal types
106// ---------------------------------------------------------------------------
107
108/// A file that has been parsed and matched to a schema URI.
109struct ParsedFile {
110    path: String,
111    content: String,
112    instance: Value,
113    /// Original schema URI before rewrites (for override matching).
114    original_schema_uri: String,
115}
116
117// ---------------------------------------------------------------------------
118// Config loading
119// ---------------------------------------------------------------------------
120
121/// Locate `lintel.toml`, load the full config, and return the config directory.
122/// Returns `(config, config_dir, config_path)`.  When no config is found or
123/// cwd is unavailable the config is default and `config_path` is `None`.
124#[tracing::instrument(skip_all)]
125pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
126    let start_dir = match search_dir {
127        Some(d) => d.to_path_buf(),
128        None => match std::env::current_dir() {
129            Ok(d) => d,
130            Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
131        },
132    };
133
134    let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
135        return (lintel_config::Config::default(), start_dir, None);
136    };
137
138    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
139    let cfg = lintel_config::find_and_load(&start_dir)
140        .ok()
141        .flatten()
142        .unwrap_or_default();
143    (cfg, dir, Some(config_path))
144}
145
146// ---------------------------------------------------------------------------
147// File collection
148// ---------------------------------------------------------------------------
149
150/// Collect input files from globs/directories, applying exclude filters.
151///
152/// # Errors
153///
154/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
155#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
156pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
157    if globs.is_empty() {
158        return discover::discover_files(".", exclude);
159    }
160
161    let mut result = Vec::new();
162    for pattern in globs {
163        let path = Path::new(pattern);
164        if path.is_dir() {
165            result.extend(discover::discover_files(pattern, exclude)?);
166        } else {
167            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
168                let path = entry?;
169                if path.is_file() && !is_excluded(&path, exclude) {
170                    result.push(path);
171                }
172            }
173        }
174    }
175    Ok(result)
176}
177
178fn is_excluded(path: &Path, excludes: &[String]) -> bool {
179    let path_str = match path.to_str() {
180        Some(s) => s.strip_prefix("./").unwrap_or(s),
181        None => return false,
182    };
183    excludes
184        .iter()
185        .any(|pattern| glob_match::glob_match(pattern, path_str))
186}
187
188// ---------------------------------------------------------------------------
189// Phase 1: Parse files and resolve schema URIs
190// ---------------------------------------------------------------------------
191
192/// Try parsing content with each known format, returning the first success.
193///
194/// JSONC is tried first (superset of JSON, handles comments), then YAML and
195/// TOML which cover the most common config formats, followed by the rest.
196pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
197    use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
198    const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
199
200    for fmt in FORMATS {
201        let parser = parsers::parser_for(fmt);
202        if let Ok(val) = parser.parse(content, file_name) {
203            return Some((fmt, val));
204        }
205    }
206    None
207}
208
209/// Result of processing a single file: either a parsed file with its schema URI,
210/// a lint error, or nothing (file was skipped).
211enum FileResult {
212    Parsed {
213        schema_uri: String,
214        parsed: ParsedFile,
215    },
216    Error(LintError),
217    Skip,
218}
219
220/// Resolve a relative local schema path against a base directory.
221///
222/// Remote URIs (http/https) are returned unchanged. For local paths, joins with
223/// the provided base directory (file's parent for inline `$schema`, config dir
224/// for config/catalog sources).
225fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
226    if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
227        return schema_uri.to_string();
228    }
229    if let Some(dir) = base_dir {
230        dir.join(schema_uri).to_string_lossy().to_string()
231    } else {
232        schema_uri.to_string()
233    }
234}
235
236/// Process a single file's already-read content: parse and resolve schema URI.
237///
238/// Returns a `Vec` because JSONL files expand to one result per non-empty line.
239#[allow(clippy::too_many_arguments)]
240fn process_one_file(
241    path: &Path,
242    content: String,
243    config: &lintel_config::Config,
244    config_dir: &Path,
245    compiled_catalogs: &[CompiledCatalog],
246) -> Vec<FileResult> {
247    let path_str = path.display().to_string();
248    let file_name = path
249        .file_name()
250        .and_then(|n| n.to_str())
251        .unwrap_or(&path_str);
252
253    let detected_format = parsers::detect_format(path);
254
255    // JSONL files get special per-line handling.
256    if detected_format == Some(FileFormat::Jsonl) {
257        return process_jsonl_file(
258            path,
259            &path_str,
260            file_name,
261            &content,
262            config,
263            config_dir,
264            compiled_catalogs,
265        );
266    }
267
268    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
269    if detected_format.is_none() {
270        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
271            || compiled_catalogs
272                .iter()
273                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
274        if !has_match {
275            return vec![FileResult::Skip];
276        }
277    }
278
279    // Parse the file content.
280    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
281        let parser = parsers::parser_for(fmt);
282        match parser.parse(&content, &path_str) {
283            Ok(val) => (parser, val),
284            Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
285        }
286    } else {
287        match try_parse_all(&content, &path_str) {
288            Some((fmt, val)) => (parsers::parser_for(fmt), val),
289            None => return vec![FileResult::Skip],
290        }
291    };
292
293    // Skip markdown files with no frontmatter
294    if instance.is_null() {
295        return vec![FileResult::Skip];
296    }
297
298    // Schema resolution priority:
299    // 1. Inline $schema / YAML modeline (always wins)
300    // 2. Custom schema mappings from lintel.toml [schemas]
301    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
302    //
303    // Track whether the URI came from inline $schema (resolve relative to file)
304    // or from config/catalog (resolve relative to config dir).
305    let inline_uri = parser.extract_schema_uri(&content, &instance);
306    let from_inline = inline_uri.is_some();
307    let schema_uri = inline_uri
308        .or_else(|| {
309            config
310                .find_schema_mapping(&path_str, file_name)
311                .map(str::to_string)
312        })
313        .or_else(|| {
314            compiled_catalogs
315                .iter()
316                .find_map(|cat| cat.find_schema(&path_str, file_name))
317                .map(str::to_string)
318        });
319
320    let Some(schema_uri) = schema_uri else {
321        return vec![FileResult::Skip];
322    };
323
324    // Keep original URI for override matching (before rewrites)
325    let original_schema_uri = schema_uri.clone();
326
327    // Apply rewrite rules, then resolve // paths relative to lintel.toml
328    let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
329    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
330
331    // Resolve relative local paths:
332    // - Inline $schema: relative to the file's parent directory
333    // - Config/catalog: relative to the config directory (where lintel.toml lives)
334    let schema_uri = resolve_local_schema_path(
335        &schema_uri,
336        if from_inline {
337            path.parent()
338        } else {
339            Some(config_dir)
340        },
341    );
342
343    vec![FileResult::Parsed {
344        schema_uri,
345        parsed: ParsedFile {
346            path: path_str,
347            content,
348            instance,
349            original_schema_uri,
350        },
351    }]
352}
353
354/// Process a JSONL file: parse each line independently and resolve schemas.
355///
356/// Each non-empty line becomes its own [`FileResult::Parsed`]. Schema resolution
357/// priority per line: inline `$schema` on the line > config mapping > catalog.
358///
359/// Also checks schema consistency across lines — mismatches are emitted as
360/// [`FileResult::Error`] so they flow through the normal Reporter pipeline.
361#[allow(clippy::too_many_arguments)]
362fn process_jsonl_file(
363    path: &Path,
364    path_str: &str,
365    file_name: &str,
366    content: &str,
367    config: &lintel_config::Config,
368    config_dir: &Path,
369    compiled_catalogs: &[CompiledCatalog],
370) -> Vec<FileResult> {
371    let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
372        Ok(lines) => lines,
373        Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
374    };
375
376    if lines.is_empty() {
377        return vec![FileResult::Skip];
378    }
379
380    let mut results = Vec::with_capacity(lines.len());
381
382    // Check schema consistency before consuming lines.
383    if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
384        for m in mismatches {
385            results.push(FileResult::Error(LintError::SchemaMismatch {
386                path: path_str.to_string(),
387                line_number: m.line_number,
388                message: format!("expected consistent $schema but found {}", m.schema_uri),
389            }));
390        }
391    }
392
393    for line in lines {
394        // Schema resolution: inline $schema on line > config > catalog
395        // Track source to resolve relative paths correctly.
396        let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
397        let from_inline = inline_uri.is_some();
398        let schema_uri = inline_uri
399            .or_else(|| {
400                config
401                    .find_schema_mapping(path_str, file_name)
402                    .map(str::to_string)
403            })
404            .or_else(|| {
405                compiled_catalogs
406                    .iter()
407                    .find_map(|cat| cat.find_schema(path_str, file_name))
408                    .map(str::to_string)
409            });
410
411        let Some(schema_uri) = schema_uri else {
412            continue;
413        };
414
415        let original_schema_uri = schema_uri.clone();
416
417        let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
418        let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
419
420        // Inline $schema: relative to file's parent. Config/catalog: relative to config dir.
421        let schema_uri = resolve_local_schema_path(
422            &schema_uri,
423            if from_inline {
424                path.parent()
425            } else {
426                Some(config_dir)
427            },
428        );
429
430        let line_path = format!("{path_str}:{}", line.line_number);
431
432        results.push(FileResult::Parsed {
433            schema_uri,
434            parsed: ParsedFile {
435                path: line_path,
436                content: line.raw,
437                instance: line.value,
438                original_schema_uri,
439            },
440        });
441    }
442
443    if results.is_empty() {
444        vec![FileResult::Skip]
445    } else {
446        results
447    }
448}
449
450/// Read each file concurrently with tokio, parse its content, extract its
451/// schema URI, apply rewrites, and group by resolved schema URI.
452#[tracing::instrument(skip_all, fields(file_count = files.len()))]
453#[allow(clippy::too_many_arguments)]
454async fn parse_and_group_files(
455    files: &[PathBuf],
456    config: &lintel_config::Config,
457    config_dir: &Path,
458    compiled_catalogs: &[CompiledCatalog],
459    errors: &mut Vec<LintError>,
460) -> BTreeMap<String, Vec<ParsedFile>> {
461    // Read all files concurrently using tokio async I/O, with a semaphore
462    // to avoid exhausting file descriptors on large directories.
463    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
464    let mut read_set = tokio::task::JoinSet::new();
465    for path in files {
466        let path = path.clone();
467        let sem = semaphore.clone();
468        read_set.spawn(async move {
469            let _permit = sem.acquire().await.expect("semaphore closed");
470            let result = tokio::fs::read_to_string(&path).await;
471            (path, result)
472        });
473    }
474
475    let mut file_contents = Vec::with_capacity(files.len());
476    while let Some(result) = read_set.join_next().await {
477        match result {
478            Ok(item) => file_contents.push(item),
479            Err(e) => tracing::warn!("file read task panicked: {e}"),
480        }
481    }
482
483    // Process files: parse content and resolve schema URIs.
484    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
485    for (path, content_result) in file_contents {
486        let content = match content_result {
487            Ok(c) => c,
488            Err(e) => {
489                errors.push(LintError::Io {
490                    path: path.display().to_string(),
491                    message: format!("failed to read: {e}"),
492                });
493                continue;
494            }
495        };
496        let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
497        for result in results {
498            match result {
499                FileResult::Parsed { schema_uri, parsed } => {
500                    schema_groups.entry(schema_uri).or_default().push(parsed);
501                }
502                FileResult::Error(e) => errors.push(e),
503                FileResult::Skip => {}
504            }
505        }
506    }
507
508    schema_groups
509}
510
511// ---------------------------------------------------------------------------
512// Phase 2: Schema fetching, compilation, and instance validation
513// ---------------------------------------------------------------------------
514
515/// Fetch a schema by URI, returning its parsed JSON and cache status.
516///
517/// For remote URIs, checks the prefetched map first; for local URIs, reads
518/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
519#[allow(clippy::too_many_arguments)]
520async fn fetch_schema_from_prefetched(
521    schema_uri: &str,
522    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
523    local_cache: &mut HashMap<String, Value>,
524    group: &[ParsedFile],
525    errors: &mut Vec<LintError>,
526    checked: &mut Vec<CheckedFile>,
527    on_check: &mut impl FnMut(&CheckedFile),
528) -> Option<(Value, Option<CacheStatus>)> {
529    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
530
531    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
532        match prefetched.get(schema_uri) {
533            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
534            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
535            None => Err(format!("schema not prefetched: {schema_uri}")),
536        }
537    } else if let Some(cached) = local_cache.get(schema_uri) {
538        Ok((cached.clone(), None))
539    } else {
540        tokio::fs::read_to_string(schema_uri)
541            .await
542            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
543            .and_then(|content| {
544                serde_json::from_str::<Value>(&content)
545                    .map(|v| {
546                        local_cache.insert(schema_uri.to_string(), v.clone());
547                        (v, None)
548                    })
549                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
550            })
551    };
552
553    match result {
554        Ok(value) => Some(value),
555        Err(message) => {
556            report_group_error(
557                |path| LintError::SchemaFetch {
558                    path: path.to_string(),
559                    message: message.clone(),
560                },
561                schema_uri,
562                None,
563                group,
564                errors,
565                checked,
566                on_check,
567            );
568            None
569        }
570    }
571}
572
573/// Report the same error for every file in a schema group.
574#[allow(clippy::too_many_arguments)]
575fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
576    make_error: impl Fn(&str) -> LintError,
577    schema_uri: &str,
578    cache_status: Option<CacheStatus>,
579    group: &[P],
580    errors: &mut Vec<LintError>,
581    checked: &mut Vec<CheckedFile>,
582    on_check: &mut impl FnMut(&CheckedFile),
583) {
584    for item in group {
585        let pf = item.borrow();
586        let cf = CheckedFile {
587            path: pf.path.clone(),
588            schema: schema_uri.to_string(),
589            cache_status,
590            validation_cache_status: None,
591        };
592        on_check(&cf);
593        checked.push(cf);
594        errors.push(make_error(&pf.path));
595    }
596}
597
598/// Mark every file in a group as checked (no errors).
599#[allow(clippy::too_many_arguments)]
600fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
601    schema_uri: &str,
602    cache_status: Option<CacheStatus>,
603    validation_cache_status: Option<ValidationCacheStatus>,
604    group: &[P],
605    checked: &mut Vec<CheckedFile>,
606    on_check: &mut impl FnMut(&CheckedFile),
607) {
608    for item in group {
609        let pf = item.borrow();
610        let cf = CheckedFile {
611            path: pf.path.clone(),
612            schema: schema_uri.to_string(),
613            cache_status,
614            validation_cache_status,
615        };
616        on_check(&cf);
617        checked.push(cf);
618    }
619}
620
621/// Clean up error messages from the `jsonschema` crate.
622///
623/// For `anyOf`/`oneOf` failures the crate dumps the entire JSON value into the
624/// message (e.g. `{...} is not valid under any of the schemas listed in the 'oneOf' keyword`).
625/// The source snippet already shows the value, so we strip the redundant prefix
626/// and keep only `"not valid under any of the schemas listed in the 'oneOf' keyword"`.
627///
628/// All other messages are returned unchanged.
629fn clean_error_message(msg: String) -> String {
630    const MARKER: &str = " is not valid under any of the schemas listed in the '";
631    if let Some(pos) = msg.find(MARKER) {
632        // pos points to " is not valid...", skip " is " (4 chars) to get "not valid..."
633        return msg[pos + 4..].to_string();
634    }
635    msg
636}
637
638/// Convert [`ValidationError`]s into [`LintError::Validation`] diagnostics.
639fn push_validation_errors(
640    pf: &ParsedFile,
641    schema_url: &str,
642    validation_errors: &[ValidationError],
643    errors: &mut Vec<LintError>,
644) {
645    for ve in validation_errors {
646        let span = find_instance_path_span(&pf.content, &ve.instance_path);
647        let instance_path = if ve.instance_path.is_empty() {
648            DEFAULT_LABEL.to_string()
649        } else {
650            ve.instance_path.clone()
651        };
652        let label = format_label(&instance_path, &ve.schema_path);
653        let source_span: miette::SourceSpan = span.into();
654        errors.push(LintError::Validation {
655            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
656            span: source_span,
657            schema_span: source_span,
658            path: pf.path.clone(),
659            instance_path,
660            label,
661            message: ve.message.clone(),
662            schema_url: schema_url.to_string(),
663            schema_path: ve.schema_path.clone(),
664        });
665    }
666}
667
668/// Validate all files in a group against an already-compiled validator and store
669/// results in the validation cache.
670#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
671#[allow(clippy::too_many_arguments)]
672async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
673    validator: &jsonschema::Validator,
674    schema_uri: &str,
675    schema_hash: &str,
676    validate_formats: bool,
677    cache_status: Option<CacheStatus>,
678    group: &[P],
679    vcache: &lintel_validation_cache::ValidationCache,
680    errors: &mut Vec<LintError>,
681    checked: &mut Vec<CheckedFile>,
682    on_check: &mut impl FnMut(&CheckedFile),
683) {
684    for item in group {
685        let pf = item.borrow();
686        let file_errors: Vec<ValidationError> = validator
687            .iter_errors(&pf.instance)
688            .map(|error| ValidationError {
689                instance_path: error.instance_path().to_string(),
690                message: clean_error_message(error.to_string()),
691                schema_path: error.schema_path().to_string(),
692            })
693            .collect();
694
695        vcache
696            .store(
697                &lintel_validation_cache::CacheKey {
698                    file_content: &pf.content,
699                    schema_hash,
700                    validate_formats,
701                },
702                &file_errors,
703            )
704            .await;
705        push_validation_errors(pf, schema_uri, &file_errors, errors);
706
707        let cf = CheckedFile {
708            path: pf.path.clone(),
709            schema: schema_uri.to_string(),
710            cache_status,
711            validation_cache_status: Some(ValidationCacheStatus::Miss),
712        };
713        on_check(&cf);
714        checked.push(cf);
715    }
716}
717
718// ---------------------------------------------------------------------------
719// Public API
720// ---------------------------------------------------------------------------
721
722/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
723///
724/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
725pub async fn fetch_compiled_catalogs(
726    retriever: &SchemaCache,
727    config: &lintel_config::Config,
728    no_catalog: bool,
729) -> Vec<CompiledCatalog> {
730    let mut compiled_catalogs = Vec::new();
731
732    if !no_catalog {
733        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
734
735        // Catalogs are fetched concurrently but sorted by priority so that
736        // the Lintel catalog wins over custom registries, which win over
737        // SchemaStore.  The `order` field encodes this precedence.
738        #[allow(clippy::items_after_statements)]
739        type CatalogResult = (
740            usize, // priority (lower = higher precedence)
741            String,
742            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
743        );
744        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
745
746        // Custom registries from lintel.toml (highest precedence among catalogs)
747        for (i, registry_url) in config.registries.iter().enumerate() {
748            let r = retriever.clone();
749            let url = registry_url.clone();
750            let label = format!("registry {url}");
751            catalog_tasks.spawn(async move {
752                let result = registry::fetch(&r, &url)
753                    .await
754                    .map(|cat| CompiledCatalog::compile(&cat));
755                (i, label, result)
756            });
757        }
758
759        // Lintel catalog
760        let lintel_order = config.registries.len();
761        if !config.no_default_catalog {
762            let r = retriever.clone();
763            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
764            catalog_tasks.spawn(async move {
765                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
766                    .await
767                    .map(|cat| CompiledCatalog::compile(&cat));
768                (lintel_order, label, result)
769            });
770        }
771
772        // SchemaStore catalog (lowest precedence)
773        let schemastore_order = config.registries.len() + 1;
774        let r = retriever.clone();
775        catalog_tasks.spawn(async move {
776            let result = catalog::fetch_catalog(&r)
777                .await
778                .map(|cat| CompiledCatalog::compile(&cat));
779            (schemastore_order, "SchemaStore catalog".to_string(), result)
780        });
781
782        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
783        while let Some(result) = catalog_tasks.join_next().await {
784            match result {
785                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
786                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
787                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
788            }
789        }
790        results.sort_by_key(|(order, _)| *order);
791        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
792
793        drop(catalog_span);
794    }
795
796    compiled_catalogs
797}
798
799/// # Errors
800///
801/// Returns an error if file collection or schema validation encounters an I/O error.
802pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
803    run_with(args, None, |_| {}).await
804}
805
806/// Like [`run`], but calls `on_check` each time a file is checked, allowing
807/// callers to stream progress (e.g. verbose output) as files are processed.
808///
809/// # Errors
810///
811/// Returns an error if file collection or schema validation encounters an I/O error.
812#[tracing::instrument(skip_all, name = "validate")]
813#[allow(clippy::too_many_lines)]
814pub async fn run_with(
815    args: &ValidateArgs,
816    cache: Option<SchemaCache>,
817    mut on_check: impl FnMut(&CheckedFile),
818) -> Result<ValidateResult> {
819    let retriever = if let Some(c) = cache {
820        c
821    } else {
822        let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
823        if let Some(dir) = &args.cache_dir {
824            let path = PathBuf::from(dir);
825            let _ = fs::create_dir_all(&path);
826            builder = builder.cache_dir(path);
827        }
828        if let Some(ttl) = args.schema_cache_ttl {
829            builder = builder.ttl(ttl);
830        }
831        builder.build()
832    };
833
834    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
835    let files = collect_files(&args.globs, &args.exclude)?;
836    tracing::info!(file_count = files.len(), "collected files");
837
838    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
839
840    let mut errors: Vec<LintError> = Vec::new();
841    let mut checked: Vec<CheckedFile> = Vec::new();
842
843    // Phase 1: Parse files and resolve schema URIs
844    let schema_groups = parse_and_group_files(
845        &files,
846        &config,
847        &config_dir,
848        &compiled_catalogs,
849        &mut errors,
850    )
851    .await;
852    tracing::info!(
853        schema_count = schema_groups.len(),
854        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
855        "grouped files by schema"
856    );
857
858    // Create validation cache
859    let vcache = lintel_validation_cache::ValidationCache::new(
860        lintel_validation_cache::ensure_cache_dir(),
861        args.force_validation,
862    );
863
864    // Prefetch all remote schemas in parallel
865    let remote_uris: Vec<&String> = schema_groups
866        .keys()
867        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
868        .collect();
869
870    let prefetched = {
871        let _prefetch_span =
872            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
873
874        let mut schema_tasks = tokio::task::JoinSet::new();
875        for uri in remote_uris {
876            let r = retriever.clone();
877            let u = uri.clone();
878            schema_tasks.spawn(async move {
879                let result = r.fetch(&u).await;
880                (u, result)
881            });
882        }
883
884        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
885        while let Some(result) = schema_tasks.join_next().await {
886            match result {
887                Ok((uri, fetch_result)) => {
888                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
889                }
890                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
891            }
892        }
893
894        prefetched
895    };
896
897    // Phase 2: Compile each schema once and validate all matching files
898    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
899    let mut fetch_time = core::time::Duration::ZERO;
900    let mut hash_time = core::time::Duration::ZERO;
901    let mut vcache_time = core::time::Duration::ZERO;
902    let mut compile_time = core::time::Duration::ZERO;
903    let mut validate_time = core::time::Duration::ZERO;
904
905    for (schema_uri, group) in &schema_groups {
906        let _group_span = tracing::debug_span!(
907            "schema_group",
908            schema = schema_uri.as_str(),
909            files = group.len(),
910        )
911        .entered();
912
913        // If ANY file in the group matches a `validate_formats = false` override,
914        // disable format validation for the whole group (they share one compiled validator).
915        let validate_formats = group.iter().all(|pf| {
916            config
917                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
918        });
919
920        // Remote schemas were prefetched in parallel above; local schemas are
921        // read from disk here (with in-memory caching).
922        let t = std::time::Instant::now();
923        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
924            schema_uri,
925            &prefetched,
926            &mut local_schema_cache,
927            group,
928            &mut errors,
929            &mut checked,
930            &mut on_check,
931        )
932        .await
933        else {
934            fetch_time += t.elapsed();
935            continue;
936        };
937        fetch_time += t.elapsed();
938
939        // Pre-compute schema hash once for the entire group.
940        let t = std::time::Instant::now();
941        let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
942        hash_time += t.elapsed();
943
944        // Split the group into validation cache hits and misses.
945        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
946
947        let t = std::time::Instant::now();
948        for pf in group {
949            let (cached, vcache_status) = vcache
950                .lookup(&lintel_validation_cache::CacheKey {
951                    file_content: &pf.content,
952                    schema_hash: &schema_hash,
953                    validate_formats,
954                })
955                .await;
956
957            if let Some(cached_errors) = cached {
958                push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
959                let cf = CheckedFile {
960                    path: pf.path.clone(),
961                    schema: schema_uri.clone(),
962                    cache_status,
963                    validation_cache_status: Some(vcache_status),
964                };
965                on_check(&cf);
966                checked.push(cf);
967            } else {
968                cache_misses.push(pf);
969            }
970        }
971        vcache_time += t.elapsed();
972
973        tracing::debug!(
974            cache_hits = group.len() - cache_misses.len(),
975            cache_misses = cache_misses.len(),
976            "validation cache"
977        );
978
979        // If all files hit the validation cache, skip schema compilation entirely.
980        if cache_misses.is_empty() {
981            continue;
982        }
983
984        // Compile the schema for cache misses.
985        let t = std::time::Instant::now();
986        let validator = {
987            // Set base URI so relative $ref values (e.g. "./rule.json") resolve
988            // correctly. Remote schemas use the HTTP URI directly; local schemas
989            // get a file:// URI derived from the canonical absolute path.
990            let is_remote_schema =
991                schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
992            let local_retriever = LocalRetriever {
993                http: retriever.clone(),
994            };
995            let opts = jsonschema::async_options()
996                .with_retriever(local_retriever)
997                .should_validate_formats(validate_formats);
998            let base_uri = if is_remote_schema {
999                // Strip fragment (e.g. "#") — base URIs must not contain fragments.
1000                let uri = match schema_uri.find('#') {
1001                    Some(pos) => schema_uri[..pos].to_string(),
1002                    None => schema_uri.clone(),
1003                };
1004                Some(uri)
1005            } else {
1006                std::fs::canonicalize(schema_uri)
1007                    .ok()
1008                    .map(|p| format!("file://{}", p.display()))
1009            };
1010            let opts = if let Some(uri) = base_uri {
1011                opts.with_base_uri(uri)
1012            } else {
1013                opts
1014            };
1015            match opts.build(&schema_value).await {
1016                Ok(v) => v,
1017                Err(e) => {
1018                    compile_time += t.elapsed();
1019                    // When format validation is disabled and the compilation error
1020                    // is a uri-reference issue (e.g. Rust-style $ref paths in
1021                    // vector.json), skip validation silently.
1022                    if !validate_formats && e.to_string().contains("uri-reference") {
1023                        mark_group_checked(
1024                            schema_uri,
1025                            cache_status,
1026                            Some(ValidationCacheStatus::Miss),
1027                            &cache_misses,
1028                            &mut checked,
1029                            &mut on_check,
1030                        );
1031                        continue;
1032                    }
1033                    let msg = format!("failed to compile schema: {e}");
1034                    report_group_error(
1035                        |path| LintError::SchemaCompile {
1036                            path: path.to_string(),
1037                            message: msg.clone(),
1038                        },
1039                        schema_uri,
1040                        cache_status,
1041                        &cache_misses,
1042                        &mut errors,
1043                        &mut checked,
1044                        &mut on_check,
1045                    );
1046                    continue;
1047                }
1048            }
1049        };
1050        compile_time += t.elapsed();
1051
1052        let t = std::time::Instant::now();
1053        validate_group(
1054            &validator,
1055            schema_uri,
1056            &schema_hash,
1057            validate_formats,
1058            cache_status,
1059            &cache_misses,
1060            &vcache,
1061            &mut errors,
1062            &mut checked,
1063            &mut on_check,
1064        )
1065        .await;
1066        validate_time += t.elapsed();
1067    }
1068
1069    #[allow(clippy::cast_possible_truncation)]
1070    {
1071        tracing::info!(
1072            fetch_ms = fetch_time.as_millis() as u64,
1073            hash_ms = hash_time.as_millis() as u64,
1074            vcache_ms = vcache_time.as_millis() as u64,
1075            compile_ms = compile_time.as_millis() as u64,
1076            validate_ms = validate_time.as_millis() as u64,
1077            "phase2 breakdown"
1078        );
1079    }
1080
1081    // Sort errors for deterministic output (by path, then by span offset)
1082    errors.sort_by(|a, b| {
1083        a.path()
1084            .cmp(b.path())
1085            .then_with(|| a.offset().cmp(&b.offset()))
1086    });
1087
1088    Ok(ValidateResult { errors, checked })
1089}
1090
1091#[cfg(test)]
1092mod tests {
1093    use super::*;
1094    use lintel_schema_cache::SchemaCache;
1095    use std::path::Path;
1096
1097    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1098        let cache = SchemaCache::memory();
1099        for (uri, body) in entries {
1100            cache.insert(
1101                uri,
1102                serde_json::from_str(body).expect("test mock: invalid JSON"),
1103            );
1104        }
1105        cache
1106    }
1107
1108    fn testdata() -> PathBuf {
1109        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1110    }
1111
1112    /// Build glob patterns that scan one or more testdata directories for all supported file types.
1113    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1114        dirs.iter()
1115            .flat_map(|dir| {
1116                let base = testdata().join(dir);
1117                vec![
1118                    base.join("*.json").to_string_lossy().to_string(),
1119                    base.join("*.yaml").to_string_lossy().to_string(),
1120                    base.join("*.yml").to_string_lossy().to_string(),
1121                    base.join("*.json5").to_string_lossy().to_string(),
1122                    base.join("*.jsonc").to_string_lossy().to_string(),
1123                    base.join("*.toml").to_string_lossy().to_string(),
1124                ]
1125            })
1126            .collect()
1127    }
1128
1129    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1130        ValidateArgs {
1131            globs: scenario_globs(dirs),
1132            exclude: vec![],
1133            cache_dir: None,
1134            force_schema_fetch: true,
1135            force_validation: true,
1136            no_catalog: true,
1137            config_dir: None,
1138            schema_cache_ttl: None,
1139        }
1140    }
1141
1142    const SCHEMA: &str =
1143        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1144
1145    fn schema_mock() -> SchemaCache {
1146        mock(&[("https://example.com/schema.json", SCHEMA)])
1147    }
1148
1149    // --- Directory scanning tests ---
1150
1151    #[tokio::test]
1152    async fn no_matching_files() -> anyhow::Result<()> {
1153        let tmp = tempfile::tempdir()?;
1154        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1155        let c = ValidateArgs {
1156            globs: vec![pattern],
1157            exclude: vec![],
1158            cache_dir: None,
1159            force_schema_fetch: true,
1160            force_validation: true,
1161            no_catalog: true,
1162            config_dir: None,
1163            schema_cache_ttl: None,
1164        };
1165        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1166        assert!(!result.has_errors());
1167        Ok(())
1168    }
1169
1170    #[tokio::test]
1171    async fn dir_all_valid() -> anyhow::Result<()> {
1172        let c = args_for_dirs(&["positive_tests"]);
1173        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1174        assert!(!result.has_errors());
1175        Ok(())
1176    }
1177
1178    #[tokio::test]
1179    async fn dir_all_invalid() -> anyhow::Result<()> {
1180        let c = args_for_dirs(&["negative_tests"]);
1181        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1182        assert!(result.has_errors());
1183        Ok(())
1184    }
1185
1186    #[tokio::test]
1187    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1188        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1189        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1190        assert!(result.has_errors());
1191        Ok(())
1192    }
1193
1194    #[tokio::test]
1195    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1196        let c = args_for_dirs(&["no_schema"]);
1197        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1198        assert!(!result.has_errors());
1199        Ok(())
1200    }
1201
1202    #[tokio::test]
1203    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1204        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1205        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1206        assert!(!result.has_errors());
1207        Ok(())
1208    }
1209
1210    // --- Directory as positional arg ---
1211
1212    #[tokio::test]
1213    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1214        let dir = testdata().join("positive_tests");
1215        let c = ValidateArgs {
1216            globs: vec![dir.to_string_lossy().to_string()],
1217            exclude: vec![],
1218            cache_dir: None,
1219            force_schema_fetch: true,
1220            force_validation: true,
1221            no_catalog: true,
1222            config_dir: None,
1223            schema_cache_ttl: None,
1224        };
1225        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1226        assert!(!result.has_errors());
1227        assert!(result.files_checked() > 0);
1228        Ok(())
1229    }
1230
1231    #[tokio::test]
1232    async fn multiple_directory_args() -> anyhow::Result<()> {
1233        let pos_dir = testdata().join("positive_tests");
1234        let no_schema_dir = testdata().join("no_schema");
1235        let c = ValidateArgs {
1236            globs: vec![
1237                pos_dir.to_string_lossy().to_string(),
1238                no_schema_dir.to_string_lossy().to_string(),
1239            ],
1240            exclude: vec![],
1241            cache_dir: None,
1242            force_schema_fetch: true,
1243            force_validation: true,
1244            no_catalog: true,
1245            config_dir: None,
1246            schema_cache_ttl: None,
1247        };
1248        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1249        assert!(!result.has_errors());
1250        Ok(())
1251    }
1252
1253    #[tokio::test]
1254    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1255        let dir = testdata().join("positive_tests");
1256        let glob_pattern = testdata()
1257            .join("no_schema")
1258            .join("*.json")
1259            .to_string_lossy()
1260            .to_string();
1261        let c = ValidateArgs {
1262            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1263            exclude: vec![],
1264            cache_dir: None,
1265            force_schema_fetch: true,
1266            force_validation: true,
1267            no_catalog: true,
1268            config_dir: None,
1269            schema_cache_ttl: None,
1270        };
1271        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1272        assert!(!result.has_errors());
1273        Ok(())
1274    }
1275
1276    #[tokio::test]
1277    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1278        let base = testdata().join("malformed");
1279        let c = ValidateArgs {
1280            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1281            exclude: vec![],
1282            cache_dir: None,
1283            force_schema_fetch: true,
1284            force_validation: true,
1285            no_catalog: true,
1286            config_dir: None,
1287            schema_cache_ttl: None,
1288        };
1289        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1290        assert!(result.has_errors());
1291        Ok(())
1292    }
1293
1294    #[tokio::test]
1295    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1296        let base = testdata().join("malformed");
1297        let c = ValidateArgs {
1298            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1299            exclude: vec![],
1300            cache_dir: None,
1301            force_schema_fetch: true,
1302            force_validation: true,
1303            no_catalog: true,
1304            config_dir: None,
1305            schema_cache_ttl: None,
1306        };
1307        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1308        assert!(result.has_errors());
1309        Ok(())
1310    }
1311
1312    // --- Exclude filter ---
1313
1314    #[tokio::test]
1315    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1316        let base = testdata().join("negative_tests");
1317        let c = ValidateArgs {
1318            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1319            exclude: vec![
1320                base.join("missing_name.json").to_string_lossy().to_string(),
1321                base.join("missing_name.toml").to_string_lossy().to_string(),
1322                base.join("missing_name.yaml").to_string_lossy().to_string(),
1323            ],
1324            cache_dir: None,
1325            force_schema_fetch: true,
1326            force_validation: true,
1327            no_catalog: true,
1328            config_dir: None,
1329            schema_cache_ttl: None,
1330        };
1331        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1332        assert!(!result.has_errors());
1333        Ok(())
1334    }
1335
1336    // --- Cache options ---
1337
1338    #[tokio::test]
1339    async fn custom_cache_dir() -> anyhow::Result<()> {
1340        let c = ValidateArgs {
1341            globs: scenario_globs(&["positive_tests"]),
1342            exclude: vec![],
1343            cache_dir: None,
1344            force_schema_fetch: true,
1345            force_validation: true,
1346            no_catalog: true,
1347            config_dir: None,
1348            schema_cache_ttl: None,
1349        };
1350        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1351        assert!(!result.has_errors());
1352        Ok(())
1353    }
1354
1355    // --- Local schema ---
1356
1357    #[tokio::test]
1358    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1359        let tmp = tempfile::tempdir()?;
1360        let schema_path = tmp.path().join("schema.json");
1361        fs::write(&schema_path, SCHEMA)?;
1362
1363        let f = tmp.path().join("valid.json");
1364        fs::write(
1365            &f,
1366            format!(
1367                r#"{{"$schema":"{}","name":"hello"}}"#,
1368                schema_path.to_string_lossy()
1369            ),
1370        )?;
1371
1372        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1373        let c = ValidateArgs {
1374            globs: vec![pattern],
1375            exclude: vec![],
1376            cache_dir: None,
1377            force_schema_fetch: true,
1378            force_validation: true,
1379            no_catalog: true,
1380            config_dir: None,
1381            schema_cache_ttl: None,
1382        };
1383        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1384        assert!(!result.has_errors());
1385        Ok(())
1386    }
1387
1388    #[tokio::test]
1389    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1390        let tmp = tempfile::tempdir()?;
1391        let schema_path = tmp.path().join("schema.json");
1392        fs::write(&schema_path, SCHEMA)?;
1393
1394        let f = tmp.path().join("valid.yaml");
1395        fs::write(
1396            &f,
1397            format!(
1398                "# yaml-language-server: $schema={}\nname: hello\n",
1399                schema_path.to_string_lossy()
1400            ),
1401        )?;
1402
1403        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1404        let c = ValidateArgs {
1405            globs: vec![pattern],
1406            exclude: vec![],
1407            cache_dir: None,
1408            force_schema_fetch: true,
1409            force_validation: true,
1410            no_catalog: true,
1411            config_dir: None,
1412            schema_cache_ttl: None,
1413        };
1414        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1415        assert!(!result.has_errors());
1416        Ok(())
1417    }
1418
1419    #[tokio::test]
1420    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1421        let tmp = tempfile::tempdir()?;
1422        let f = tmp.path().join("ref.json");
1423        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1424
1425        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1426        let c = ValidateArgs {
1427            globs: vec![pattern],
1428            exclude: vec![],
1429            cache_dir: None,
1430            force_schema_fetch: true,
1431            force_validation: true,
1432            no_catalog: true,
1433            config_dir: None,
1434            schema_cache_ttl: None,
1435        };
1436        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1437        assert!(result.has_errors());
1438        Ok(())
1439    }
1440
1441    // --- JSON5 / JSONC tests ---
1442
1443    #[tokio::test]
1444    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1445        let tmp = tempfile::tempdir()?;
1446        let schema_path = tmp.path().join("schema.json");
1447        fs::write(&schema_path, SCHEMA)?;
1448
1449        let f = tmp.path().join("config.json5");
1450        fs::write(
1451            &f,
1452            format!(
1453                r#"{{
1454  // JSON5 comment
1455  "$schema": "{}",
1456  name: "hello",
1457}}"#,
1458                schema_path.to_string_lossy()
1459            ),
1460        )?;
1461
1462        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1463        let c = ValidateArgs {
1464            globs: vec![pattern],
1465            exclude: vec![],
1466            cache_dir: None,
1467            force_schema_fetch: true,
1468            force_validation: true,
1469            no_catalog: true,
1470            config_dir: None,
1471            schema_cache_ttl: None,
1472        };
1473        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1474        assert!(!result.has_errors());
1475        Ok(())
1476    }
1477
1478    #[tokio::test]
1479    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1480        let tmp = tempfile::tempdir()?;
1481        let schema_path = tmp.path().join("schema.json");
1482        fs::write(&schema_path, SCHEMA)?;
1483
1484        let f = tmp.path().join("config.jsonc");
1485        fs::write(
1486            &f,
1487            format!(
1488                r#"{{
1489  /* JSONC comment */
1490  "$schema": "{}",
1491  "name": "hello"
1492}}"#,
1493                schema_path.to_string_lossy()
1494            ),
1495        )?;
1496
1497        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1498        let c = ValidateArgs {
1499            globs: vec![pattern],
1500            exclude: vec![],
1501            cache_dir: None,
1502            force_schema_fetch: true,
1503            force_validation: true,
1504            no_catalog: true,
1505            config_dir: None,
1506            schema_cache_ttl: None,
1507        };
1508        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1509        assert!(!result.has_errors());
1510        Ok(())
1511    }
1512
1513    // --- Catalog-based schema matching ---
1514
1515    const GH_WORKFLOW_SCHEMA: &str = r#"{
1516        "type": "object",
1517        "properties": {
1518            "name": { "type": "string" },
1519            "on": {},
1520            "jobs": { "type": "object" }
1521        },
1522        "required": ["on", "jobs"]
1523    }"#;
1524
1525    fn gh_catalog_json() -> String {
1526        r#"{"version":1,"schemas":[{
1527            "name": "GitHub Workflow",
1528            "description": "GitHub Actions workflow",
1529            "url": "https://www.schemastore.org/github-workflow.json",
1530            "fileMatch": [
1531                "**/.github/workflows/*.yml",
1532                "**/.github/workflows/*.yaml"
1533            ]
1534        }]}"#
1535            .to_string()
1536    }
1537
1538    #[tokio::test]
1539    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1540        let tmp = tempfile::tempdir()?;
1541        let cache_tmp = tempfile::tempdir()?;
1542        let wf_dir = tmp.path().join(".github/workflows");
1543        fs::create_dir_all(&wf_dir)?;
1544        fs::write(
1545            wf_dir.join("ci.yml"),
1546            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1547        )?;
1548
1549        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1550        let client = mock(&[
1551            (
1552                "https://www.schemastore.org/api/json/catalog.json",
1553                &gh_catalog_json(),
1554            ),
1555            (
1556                "https://www.schemastore.org/github-workflow.json",
1557                GH_WORKFLOW_SCHEMA,
1558            ),
1559        ]);
1560        let c = ValidateArgs {
1561            globs: vec![pattern],
1562            exclude: vec![],
1563            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1564            force_schema_fetch: true,
1565            force_validation: true,
1566            no_catalog: false,
1567            config_dir: None,
1568            schema_cache_ttl: None,
1569        };
1570        let result = run_with(&c, Some(client), |_| {}).await?;
1571        assert!(!result.has_errors());
1572        Ok(())
1573    }
1574
1575    #[tokio::test]
1576    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1577        let tmp = tempfile::tempdir()?;
1578        let cache_tmp = tempfile::tempdir()?;
1579        let wf_dir = tmp.path().join(".github/workflows");
1580        fs::create_dir_all(&wf_dir)?;
1581        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1582
1583        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1584        let client = mock(&[
1585            (
1586                "https://www.schemastore.org/api/json/catalog.json",
1587                &gh_catalog_json(),
1588            ),
1589            (
1590                "https://www.schemastore.org/github-workflow.json",
1591                GH_WORKFLOW_SCHEMA,
1592            ),
1593        ]);
1594        let c = ValidateArgs {
1595            globs: vec![pattern],
1596            exclude: vec![],
1597            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1598            force_schema_fetch: true,
1599            force_validation: true,
1600            no_catalog: false,
1601            config_dir: None,
1602            schema_cache_ttl: None,
1603        };
1604        let result = run_with(&c, Some(client), |_| {}).await?;
1605        assert!(result.has_errors());
1606        Ok(())
1607    }
1608
1609    #[tokio::test]
1610    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1611        let tmp = tempfile::tempdir()?;
1612        let cache_tmp = tempfile::tempdir()?;
1613        let wf_dir = tmp.path().join(".github/workflows");
1614        fs::create_dir_all(&wf_dir)?;
1615        fs::write(
1616            wf_dir.join("ci.yml"),
1617            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1618        )?;
1619
1620        let client = mock(&[
1621            (
1622                "https://www.schemastore.org/api/json/catalog.json",
1623                &gh_catalog_json(),
1624            ),
1625            (
1626                "https://www.schemastore.org/github-workflow.json",
1627                GH_WORKFLOW_SCHEMA,
1628            ),
1629        ]);
1630        let c = ValidateArgs {
1631            globs: vec![],
1632            exclude: vec![],
1633            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1634            force_schema_fetch: true,
1635            force_validation: true,
1636            no_catalog: false,
1637            config_dir: None,
1638            schema_cache_ttl: None,
1639        };
1640
1641        let orig_dir = std::env::current_dir()?;
1642        std::env::set_current_dir(tmp.path())?;
1643        let result = run_with(&c, Some(client), |_| {}).await?;
1644        std::env::set_current_dir(orig_dir)?;
1645
1646        assert!(!result.has_errors());
1647        Ok(())
1648    }
1649
1650    // --- TOML tests ---
1651
1652    #[tokio::test]
1653    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1654        let tmp = tempfile::tempdir()?;
1655        let schema_path = tmp.path().join("schema.json");
1656        fs::write(&schema_path, SCHEMA)?;
1657
1658        let f = tmp.path().join("config.toml");
1659        fs::write(
1660            &f,
1661            format!(
1662                "# :schema {}\nname = \"hello\"\n",
1663                schema_path.to_string_lossy()
1664            ),
1665        )?;
1666
1667        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1668        let c = ValidateArgs {
1669            globs: vec![pattern],
1670            exclude: vec![],
1671            cache_dir: None,
1672            force_schema_fetch: true,
1673            force_validation: true,
1674            no_catalog: true,
1675            config_dir: None,
1676            schema_cache_ttl: None,
1677        };
1678        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1679        assert!(!result.has_errors());
1680        Ok(())
1681    }
1682
1683    // --- Rewrite rules + // resolution ---
1684
1685    #[tokio::test]
1686    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1687        let tmp = tempfile::tempdir()?;
1688
1689        let schemas_dir = tmp.path().join("schemas");
1690        fs::create_dir_all(&schemas_dir)?;
1691        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1692
1693        fs::write(
1694            tmp.path().join("lintel.toml"),
1695            r#"
1696[rewrite]
1697"http://localhost:9000/" = "//schemas/"
1698"#,
1699        )?;
1700
1701        let f = tmp.path().join("config.json");
1702        fs::write(
1703            &f,
1704            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1705        )?;
1706
1707        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1708        let c = ValidateArgs {
1709            globs: vec![pattern],
1710            exclude: vec![],
1711            cache_dir: None,
1712            force_schema_fetch: true,
1713            force_validation: true,
1714            no_catalog: true,
1715            config_dir: Some(tmp.path().to_path_buf()),
1716            schema_cache_ttl: None,
1717        };
1718
1719        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1720        assert!(!result.has_errors());
1721        assert_eq!(result.files_checked(), 1);
1722        Ok(())
1723    }
1724
1725    #[tokio::test]
1726    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1727        let tmp = tempfile::tempdir()?;
1728
1729        let schemas_dir = tmp.path().join("schemas");
1730        fs::create_dir_all(&schemas_dir)?;
1731        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1732
1733        fs::write(tmp.path().join("lintel.toml"), "")?;
1734
1735        let sub = tmp.path().join("deeply/nested");
1736        fs::create_dir_all(&sub)?;
1737        let f = sub.join("config.json");
1738        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1739
1740        let pattern = sub.join("*.json").to_string_lossy().to_string();
1741        let c = ValidateArgs {
1742            globs: vec![pattern],
1743            exclude: vec![],
1744            cache_dir: None,
1745            force_schema_fetch: true,
1746            force_validation: true,
1747            no_catalog: true,
1748            config_dir: Some(tmp.path().to_path_buf()),
1749            schema_cache_ttl: None,
1750        };
1751
1752        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1753        assert!(!result.has_errors());
1754        Ok(())
1755    }
1756
1757    // --- Format validation override ---
1758
1759    const FORMAT_SCHEMA: &str = r#"{
1760        "type": "object",
1761        "properties": {
1762            "link": { "type": "string", "format": "uri-reference" }
1763        }
1764    }"#;
1765
1766    #[tokio::test]
1767    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1768        let tmp = tempfile::tempdir()?;
1769        let schema_path = tmp.path().join("schema.json");
1770        fs::write(&schema_path, FORMAT_SCHEMA)?;
1771
1772        let f = tmp.path().join("data.json");
1773        fs::write(
1774            &f,
1775            format!(
1776                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1777                schema_path.to_string_lossy()
1778            ),
1779        )?;
1780
1781        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1782        let c = ValidateArgs {
1783            globs: vec![pattern],
1784            exclude: vec![],
1785            cache_dir: None,
1786            force_schema_fetch: true,
1787            force_validation: true,
1788            no_catalog: true,
1789            config_dir: Some(tmp.path().to_path_buf()),
1790            schema_cache_ttl: None,
1791        };
1792        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1793        assert!(
1794            result.has_errors(),
1795            "expected format error without override"
1796        );
1797        Ok(())
1798    }
1799
1800    #[tokio::test]
1801    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1802        let tmp = tempfile::tempdir()?;
1803        let schema_path = tmp.path().join("schema.json");
1804        fs::write(&schema_path, FORMAT_SCHEMA)?;
1805
1806        let f = tmp.path().join("data.json");
1807        fs::write(
1808            &f,
1809            format!(
1810                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1811                schema_path.to_string_lossy()
1812            ),
1813        )?;
1814
1815        // Use **/data.json to match the absolute path from the tempdir.
1816        fs::write(
1817            tmp.path().join("lintel.toml"),
1818            r#"
1819[[override]]
1820files = ["**/data.json"]
1821validate_formats = false
1822"#,
1823        )?;
1824
1825        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1826        let c = ValidateArgs {
1827            globs: vec![pattern],
1828            exclude: vec![],
1829            cache_dir: None,
1830            force_schema_fetch: true,
1831            force_validation: true,
1832            no_catalog: true,
1833            config_dir: Some(tmp.path().to_path_buf()),
1834            schema_cache_ttl: None,
1835        };
1836        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1837        assert!(
1838            !result.has_errors(),
1839            "expected no errors with validate_formats = false override"
1840        );
1841        Ok(())
1842    }
1843
1844    // --- Unrecognized extension handling ---
1845
1846    #[tokio::test]
1847    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1848        let tmp = tempfile::tempdir()?;
1849        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1850
1851        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1852        let c = ValidateArgs {
1853            globs: vec![pattern],
1854            exclude: vec![],
1855            cache_dir: None,
1856            force_schema_fetch: true,
1857            force_validation: true,
1858            no_catalog: true,
1859            config_dir: Some(tmp.path().to_path_buf()),
1860            schema_cache_ttl: None,
1861        };
1862        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1863        assert!(!result.has_errors());
1864        assert_eq!(result.files_checked(), 0);
1865        Ok(())
1866    }
1867
1868    #[tokio::test]
1869    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1870        let tmp = tempfile::tempdir()?;
1871        let cache_tmp = tempfile::tempdir()?;
1872        // File has .cfg extension (unrecognized) but content is valid JSON
1873        fs::write(
1874            tmp.path().join("myapp.cfg"),
1875            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1876        )?;
1877
1878        let catalog_json = r#"{"version":1,"schemas":[{
1879            "name": "MyApp Config",
1880            "description": "MyApp configuration",
1881            "url": "https://example.com/myapp.schema.json",
1882            "fileMatch": ["*.cfg"]
1883        }]}"#;
1884        let schema =
1885            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1886
1887        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1888        let client = mock(&[
1889            (
1890                "https://www.schemastore.org/api/json/catalog.json",
1891                catalog_json,
1892            ),
1893            ("https://example.com/myapp.schema.json", schema),
1894        ]);
1895        let c = ValidateArgs {
1896            globs: vec![pattern],
1897            exclude: vec![],
1898            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1899            force_schema_fetch: true,
1900            force_validation: true,
1901            no_catalog: false,
1902            config_dir: Some(tmp.path().to_path_buf()),
1903            schema_cache_ttl: None,
1904        };
1905        let result = run_with(&c, Some(client), |_| {}).await?;
1906        assert!(!result.has_errors());
1907        assert_eq!(result.files_checked(), 1);
1908        Ok(())
1909    }
1910
1911    #[tokio::test]
1912    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1913        let tmp = tempfile::tempdir()?;
1914        let cache_tmp = tempfile::tempdir()?;
1915        // File matches catalog but content isn't parseable by any format
1916        fs::write(
1917            tmp.path().join("myapp.cfg"),
1918            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1919        )?;
1920
1921        let catalog_json = r#"{"version":1,"schemas":[{
1922            "name": "MyApp Config",
1923            "description": "MyApp configuration",
1924            "url": "https://example.com/myapp.schema.json",
1925            "fileMatch": ["*.cfg"]
1926        }]}"#;
1927
1928        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1929        let client = mock(&[(
1930            "https://www.schemastore.org/api/json/catalog.json",
1931            catalog_json,
1932        )]);
1933        let c = ValidateArgs {
1934            globs: vec![pattern],
1935            exclude: vec![],
1936            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1937            force_schema_fetch: true,
1938            force_validation: true,
1939            no_catalog: false,
1940            config_dir: Some(tmp.path().to_path_buf()),
1941            schema_cache_ttl: None,
1942        };
1943        let result = run_with(&c, Some(client), |_| {}).await?;
1944        assert!(!result.has_errors());
1945        assert_eq!(result.files_checked(), 0);
1946        Ok(())
1947    }
1948
1949    #[tokio::test]
1950    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1951        let tmp = tempfile::tempdir()?;
1952        let cache_tmp = tempfile::tempdir()?;
1953        // File has .cfg extension, content is valid JSON but fails schema validation
1954        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1955
1956        let catalog_json = r#"{"version":1,"schemas":[{
1957            "name": "MyApp Config",
1958            "description": "MyApp configuration",
1959            "url": "https://example.com/myapp.schema.json",
1960            "fileMatch": ["*.cfg"]
1961        }]}"#;
1962        let schema =
1963            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1964
1965        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1966        let client = mock(&[
1967            (
1968                "https://www.schemastore.org/api/json/catalog.json",
1969                catalog_json,
1970            ),
1971            ("https://example.com/myapp.schema.json", schema),
1972        ]);
1973        let c = ValidateArgs {
1974            globs: vec![pattern],
1975            exclude: vec![],
1976            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1977            force_schema_fetch: true,
1978            force_validation: true,
1979            no_catalog: false,
1980            config_dir: Some(tmp.path().to_path_buf()),
1981            schema_cache_ttl: None,
1982        };
1983        let result = run_with(&c, Some(client), |_| {}).await?;
1984        assert!(result.has_errors());
1985        assert_eq!(result.files_checked(), 1);
1986        Ok(())
1987    }
1988
1989    // --- Validation cache ---
1990
1991    #[tokio::test]
1992    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1993        let tmp = tempfile::tempdir()?;
1994        let schema_path = tmp.path().join("schema.json");
1995        fs::write(&schema_path, SCHEMA)?;
1996
1997        let f = tmp.path().join("valid.json");
1998        fs::write(
1999            &f,
2000            format!(
2001                r#"{{"$schema":"{}","name":"hello"}}"#,
2002                schema_path.to_string_lossy()
2003            ),
2004        )?;
2005
2006        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2007
2008        // First run: force_validation = false so results get cached
2009        let c = ValidateArgs {
2010            globs: vec![pattern.clone()],
2011            exclude: vec![],
2012            cache_dir: None,
2013            force_schema_fetch: true,
2014            force_validation: false,
2015            no_catalog: true,
2016            config_dir: None,
2017            schema_cache_ttl: None,
2018        };
2019        let mut first_statuses = Vec::new();
2020        let result = run_with(&c, Some(mock(&[])), |cf| {
2021            first_statuses.push(cf.validation_cache_status);
2022        })
2023        .await?;
2024        assert!(!result.has_errors());
2025        assert!(result.files_checked() > 0);
2026
2027        // Verify the first run recorded a validation cache miss
2028        assert!(
2029            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2030            "expected at least one validation cache miss on first run"
2031        );
2032
2033        // Second run: same file, same schema — should hit validation cache
2034        let mut second_statuses = Vec::new();
2035        let result = run_with(&c, Some(mock(&[])), |cf| {
2036            second_statuses.push(cf.validation_cache_status);
2037        })
2038        .await?;
2039        assert!(!result.has_errors());
2040
2041        // Verify the second run got a validation cache hit
2042        assert!(
2043            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2044            "expected at least one validation cache hit on second run"
2045        );
2046        Ok(())
2047    }
2048
2049    // --- clean_error_message ---
2050
2051    #[test]
2052    fn clean_strips_anyof_value() {
2053        let msg =
2054            r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2055        assert_eq!(
2056            clean_error_message(msg.to_string()),
2057            "not valid under any of the schemas listed in the 'anyOf' keyword"
2058        );
2059    }
2060
2061    #[test]
2062    fn clean_strips_oneof_value() {
2063        let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2064        assert_eq!(
2065            clean_error_message(msg.to_string()),
2066            "not valid under any of the schemas listed in the 'oneOf' keyword"
2067        );
2068    }
2069
2070    #[test]
2071    fn clean_strips_long_value() {
2072        let long_value = "x".repeat(5000);
2073        let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2074        let msg = format!("{long_value}{suffix}");
2075        assert_eq!(
2076            clean_error_message(msg),
2077            "not valid under any of the schemas listed in the 'anyOf' keyword"
2078        );
2079    }
2080
2081    #[test]
2082    fn clean_preserves_type_error() {
2083        let msg = r#"12345 is not of types "null", "string""#;
2084        assert_eq!(clean_error_message(msg.to_string()), msg);
2085    }
2086
2087    #[test]
2088    fn clean_preserves_required_property() {
2089        let msg = "\"name\" is a required property";
2090        assert_eq!(clean_error_message(msg.to_string()), msg);
2091    }
2092
2093    /// Schemas whose URI contains a fragment (e.g. `…/draft-07/schema#`)
2094    /// must compile without error — the fragment is stripped before being
2095    /// used as the base URI for `$ref` resolution.
2096    #[tokio::test]
2097    async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2098        let tmp = tempfile::tempdir()?;
2099
2100        // A minimal draft-07 schema whose `$schema` ends with `#`.
2101        let schema_body = r#"{
2102            "$schema": "http://json-schema.org/draft-07/schema#",
2103            "type": "object",
2104            "properties": { "name": { "type": "string" } },
2105            "required": ["name"]
2106        }"#;
2107
2108        let schema_url = "http://json-schema.org/draft-07/schema#";
2109
2110        let f = tmp.path().join("data.json");
2111        fs::write(
2112            &f,
2113            format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2114        )?;
2115
2116        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2117        let client = mock(&[(
2118            // The schema URI with fragment — exactly as the `$schema` value appears.
2119            schema_url,
2120            schema_body,
2121        )]);
2122        let c = ValidateArgs {
2123            globs: vec![pattern],
2124            exclude: vec![],
2125            cache_dir: None,
2126            force_schema_fetch: true,
2127            force_validation: true,
2128            no_catalog: true,
2129            config_dir: None,
2130            schema_cache_ttl: None,
2131        };
2132        let result = run_with(&c, Some(client), |_| {}).await?;
2133        assert!(
2134            !result.has_errors(),
2135            "schema URI with fragment should not cause compilation error"
2136        );
2137        assert_eq!(result.files_checked(), 1);
2138        Ok(())
2139    }
2140
2141    #[tokio::test]
2142    async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2143        let tmp = tempfile::tempdir()?;
2144
2145        // Referenced schema with a "name" string definition
2146        std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2147
2148        // Main schema that uses a relative $ref
2149        let schema_path = tmp.path().join("schema.json");
2150        std::fs::write(
2151            &schema_path,
2152            r#"{
2153                "type": "object",
2154                "properties": {
2155                    "name": { "$ref": "./defs.json" }
2156                },
2157                "required": ["name"]
2158            }"#,
2159        )?;
2160
2161        // Valid data file pointing to the local schema
2162        let schema_uri = schema_path.to_string_lossy();
2163        std::fs::write(
2164            tmp.path().join("data.json"),
2165            format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2166        )?;
2167
2168        // Invalid data file (name should be a string per defs.json)
2169        std::fs::write(
2170            tmp.path().join("bad.json"),
2171            format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2172        )?;
2173
2174        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2175        let args = ValidateArgs {
2176            globs: vec![pattern],
2177            exclude: vec![],
2178            cache_dir: None,
2179            force_schema_fetch: true,
2180            force_validation: true,
2181            no_catalog: true,
2182            config_dir: None,
2183            schema_cache_ttl: None,
2184        };
2185        let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2186
2187        // The invalid file should produce an error (name is 42, not a string)
2188        assert!(result.has_errors());
2189        // Exactly one file should have errors (bad.json), the other (data.json) should pass
2190        assert_eq!(result.errors.len(), 1);
2191        Ok(())
2192    }
2193}