Skip to main content

lintel_validate/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::Result;
7use serde_json::Value;
8
9use lintel_diagnostics::reporter::{CheckResult, CheckedFile};
10use lintel_diagnostics::{
11    DEFAULT_LABEL, LintelDiagnostic, ValidationDiagnostic, find_instance_path_span, format_label,
12};
13use lintel_schema_cache::{CacheStatus, SchemaCache};
14use lintel_validation_cache::{ValidationCacheStatus, ValidationError, ValidationErrorKind};
15use schema_catalog::{CompiledCatalog, FileFormat};
16
17use crate::catalog;
18use crate::parsers::{self, Parser};
19use crate::registry;
20use crate::suggest;
21
22/// Conservative limit for concurrent file reads to avoid exhausting file
23/// descriptors. 128 is well below the default soft limit on macOS (256) and
24/// Linux (1024) while still providing good throughput.
25const FD_CONCURRENCY_LIMIT: usize = 128;
26
27/// Composite retriever that dispatches `file://` URIs to local disk reads
28/// and everything else to the HTTP-backed [`SchemaCache`].
29struct LocalRetriever {
30    http: SchemaCache,
31}
32
33#[async_trait::async_trait]
34impl jsonschema::AsyncRetrieve for LocalRetriever {
35    async fn retrieve(
36        &self,
37        uri: &jsonschema::Uri<String>,
38    ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
39        let s = uri.as_str();
40        if let Some(raw) = s.strip_prefix("file://") {
41            let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
42            let content = tokio::fs::read_to_string(path.as_ref()).await?;
43            Ok(serde_json::from_str(&content)?)
44        } else {
45            self.http.retrieve(uri).await
46        }
47    }
48}
49
50pub struct ValidateArgs {
51    /// Glob patterns to find files (empty = auto-discover)
52    pub globs: Vec<String>,
53
54    /// Exclude files matching these globs (repeatable)
55    pub exclude: Vec<String>,
56
57    /// Cache directory for remote schemas
58    pub cache_dir: Option<String>,
59
60    /// Bypass schema cache reads (still writes fetched schemas to cache)
61    pub force_schema_fetch: bool,
62
63    /// Bypass validation cache reads (still writes results to cache)
64    pub force_validation: bool,
65
66    /// Disable `SchemaStore` catalog matching
67    pub no_catalog: bool,
68
69    /// Directory to search for `lintel.toml` (defaults to cwd)
70    pub config_dir: Option<PathBuf>,
71
72    /// TTL for cached schemas. `None` means no expiry.
73    pub schema_cache_ttl: Option<core::time::Duration>,
74}
75
76// ---------------------------------------------------------------------------
77// Internal types
78// ---------------------------------------------------------------------------
79
80/// A file that has been parsed and matched to a schema URI.
81struct ParsedFile {
82    path: String,
83    content: String,
84    instance: Value,
85    /// Original schema URI before rewrites (for override matching).
86    original_schema_uri: String,
87}
88
89// ---------------------------------------------------------------------------
90// Config loading
91// ---------------------------------------------------------------------------
92
93/// Locate `lintel.toml`, load the full config, and return the config directory.
94/// Returns `(config, config_dir, config_path)`.  When no config is found or
95/// cwd is unavailable the config is default and `config_path` is `None`.
96#[tracing::instrument(skip_all)]
97pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
98    let start_dir = match search_dir {
99        Some(d) => d.to_path_buf(),
100        None => match std::env::current_dir() {
101            Ok(d) => d,
102            Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
103        },
104    };
105
106    let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
107        return (lintel_config::Config::default(), start_dir, None);
108    };
109
110    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
111    let cfg = lintel_config::find_and_load(&start_dir)
112        .ok()
113        .flatten()
114        .unwrap_or_default();
115    (cfg, dir, Some(config_path))
116}
117
118// ---------------------------------------------------------------------------
119// File collection
120// ---------------------------------------------------------------------------
121
122/// Collect input files from globs/directories, applying exclude filters.
123///
124/// # Errors
125///
126/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
127#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
128pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
129    lintel_config::discover::collect_files(globs, exclude, |p| parsers::detect_format(p).is_some())
130}
131
132// ---------------------------------------------------------------------------
133// Phase 1: Parse files and resolve schema URIs
134// ---------------------------------------------------------------------------
135
136/// Try parsing content with each known format, returning the first success.
137///
138/// JSONC is tried first (superset of JSON, handles comments), then YAML and
139/// TOML which cover the most common config formats, followed by the rest.
140pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
141    use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
142    const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
143
144    for fmt in FORMATS {
145        let parser = parsers::parser_for(fmt);
146        if let Ok(val) = parser.parse(content, file_name) {
147            return Some((fmt, val));
148        }
149    }
150    None
151}
152
153/// Result of processing a single file: either a parsed file with its schema URI,
154/// a lint error, or nothing (file was skipped).
155enum FileResult {
156    Parsed {
157        schema_uri: String,
158        parsed: ParsedFile,
159    },
160    Error(LintelDiagnostic),
161    Skip,
162}
163
164/// Resolve a relative local schema path against a base directory.
165///
166/// Remote URIs (http/https) are returned unchanged. For local paths, joins with
167/// the provided base directory (file's parent for inline `$schema`, config dir
168/// for config/catalog sources).
169fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
170    if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
171        return schema_uri.to_string();
172    }
173    if let Some(dir) = base_dir {
174        normalize_path(&dir.join(schema_uri))
175            .to_string_lossy()
176            .to_string()
177    } else {
178        schema_uri.to_string()
179    }
180}
181
182/// Normalize a path by resolving `.` and `..` components without touching the
183/// filesystem (unlike `std::fs::canonicalize`).
184fn normalize_path(path: &Path) -> PathBuf {
185    let mut out = PathBuf::new();
186    for component in path.components() {
187        match component {
188            std::path::Component::CurDir => {}
189            std::path::Component::ParentDir => {
190                out.pop();
191            }
192            c => out.push(c),
193        }
194    }
195    out
196}
197
198/// Process a single file's already-read content: parse and resolve schema URI.
199///
200/// Returns a `Vec` because JSONL files expand to one result per non-empty line.
201#[allow(clippy::too_many_arguments)]
202fn process_one_file(
203    path: &Path,
204    content: String,
205    config: &lintel_config::Config,
206    config_dir: &Path,
207    compiled_catalogs: &[CompiledCatalog],
208) -> Vec<FileResult> {
209    let path_str = path.display().to_string();
210    let file_name = path
211        .file_name()
212        .and_then(|n| n.to_str())
213        .unwrap_or(&path_str);
214
215    let detected_format = parsers::detect_format(path);
216
217    // JSONL files get special per-line handling.
218    if detected_format == Some(FileFormat::Jsonl) {
219        return process_jsonl_file(
220            path,
221            &path_str,
222            file_name,
223            &content,
224            config,
225            config_dir,
226            compiled_catalogs,
227        );
228    }
229
230    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
231    if detected_format.is_none() {
232        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
233            || compiled_catalogs
234                .iter()
235                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
236        if !has_match {
237            return vec![FileResult::Skip];
238        }
239    }
240
241    // Parse the file content.
242    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
243        let parser = parsers::parser_for(fmt);
244        match parser.parse(&content, &path_str) {
245            Ok(val) => (parser, val),
246            Err(parse_err) => return vec![FileResult::Error(parse_err)],
247        }
248    } else {
249        match try_parse_all(&content, &path_str) {
250            Some((fmt, val)) => (parsers::parser_for(fmt), val),
251            None => return vec![FileResult::Skip],
252        }
253    };
254
255    // Skip markdown files with no frontmatter
256    if instance.is_null() {
257        return vec![FileResult::Skip];
258    }
259
260    // Schema resolution priority:
261    // 1. Inline $schema / YAML modeline (always wins)
262    // 2. Custom schema mappings from lintel.toml [schemas]
263    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
264    //
265    // Track whether the URI came from inline $schema (resolve relative to file)
266    // or from config/catalog (resolve relative to config dir).
267    let inline_uri = parser.extract_schema_uri(&content, &instance);
268    let from_inline = inline_uri.is_some();
269    let schema_uri = inline_uri
270        .or_else(|| {
271            config
272                .find_schema_mapping(&path_str, file_name)
273                .map(str::to_string)
274        })
275        .or_else(|| {
276            compiled_catalogs
277                .iter()
278                .find_map(|cat| cat.find_schema(&path_str, file_name))
279                .map(str::to_string)
280        });
281
282    let Some(schema_uri) = schema_uri else {
283        return vec![FileResult::Skip];
284    };
285
286    // Keep original URI for override matching (before rewrites)
287    let original_schema_uri = schema_uri.clone();
288
289    // Apply rewrite rules, then resolve // paths relative to lintel.toml
290    let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
291    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
292
293    // Resolve relative local paths:
294    // - Inline $schema: relative to the file's parent directory
295    // - Config/catalog: relative to the config directory (where lintel.toml lives)
296    let schema_uri = resolve_local_schema_path(
297        &schema_uri,
298        if from_inline {
299            path.parent()
300        } else {
301            Some(config_dir)
302        },
303    );
304
305    vec![FileResult::Parsed {
306        schema_uri,
307        parsed: ParsedFile {
308            path: path_str,
309            content,
310            instance,
311            original_schema_uri,
312        },
313    }]
314}
315
316/// Process a JSONL file: parse each line independently and resolve schemas.
317///
318/// Each non-empty line becomes its own [`FileResult::Parsed`]. Schema resolution
319/// priority per line: inline `$schema` on the line > config mapping > catalog.
320///
321/// Also checks schema consistency across lines — mismatches are emitted as
322/// [`FileResult::Error`] so they flow through the normal Reporter pipeline.
323#[allow(clippy::too_many_arguments)]
324fn process_jsonl_file(
325    path: &Path,
326    path_str: &str,
327    file_name: &str,
328    content: &str,
329    config: &lintel_config::Config,
330    config_dir: &Path,
331    compiled_catalogs: &[CompiledCatalog],
332) -> Vec<FileResult> {
333    let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
334        Ok(lines) => lines,
335        Err(parse_err) => return vec![FileResult::Error(parse_err)],
336    };
337
338    if lines.is_empty() {
339        return vec![FileResult::Skip];
340    }
341
342    let mut results = Vec::with_capacity(lines.len());
343
344    // Check schema consistency before consuming lines.
345    if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
346        for m in mismatches {
347            results.push(FileResult::Error(LintelDiagnostic::SchemaMismatch {
348                path: path_str.to_string(),
349                line_number: m.line_number,
350                message: format!("expected consistent $schema but found {}", m.schema_uri),
351            }));
352        }
353    }
354
355    for line in lines {
356        // Schema resolution: inline $schema on line > config > catalog
357        // Track source to resolve relative paths correctly.
358        let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
359        let from_inline = inline_uri.is_some();
360        let schema_uri = inline_uri
361            .or_else(|| {
362                config
363                    .find_schema_mapping(path_str, file_name)
364                    .map(str::to_string)
365            })
366            .or_else(|| {
367                compiled_catalogs
368                    .iter()
369                    .find_map(|cat| cat.find_schema(path_str, file_name))
370                    .map(str::to_string)
371            });
372
373        let Some(schema_uri) = schema_uri else {
374            continue;
375        };
376
377        let original_schema_uri = schema_uri.clone();
378
379        let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
380        let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
381
382        // Inline $schema: relative to file's parent. Config/catalog: relative to config dir.
383        let schema_uri = resolve_local_schema_path(
384            &schema_uri,
385            if from_inline {
386                path.parent()
387            } else {
388                Some(config_dir)
389            },
390        );
391
392        let line_path = format!("{path_str}:{}", line.line_number);
393
394        results.push(FileResult::Parsed {
395            schema_uri,
396            parsed: ParsedFile {
397                path: line_path,
398                content: line.raw,
399                instance: line.value,
400                original_schema_uri,
401            },
402        });
403    }
404
405    if results.is_empty() {
406        vec![FileResult::Skip]
407    } else {
408        results
409    }
410}
411
412/// Read files concurrently with tokio, using a semaphore to avoid exhausting
413/// file descriptors. I/O errors are pushed as `LintelDiagnostic::Io`.
414///
415/// # Panics
416///
417/// Panics if the internal semaphore is unexpectedly closed (should not happen).
418#[tracing::instrument(skip_all, fields(file_count = files.len()))]
419pub async fn read_files(
420    files: &[PathBuf],
421    errors: &mut Vec<LintelDiagnostic>,
422) -> Vec<(PathBuf, String)> {
423    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
424    let mut read_set = tokio::task::JoinSet::new();
425    for path in files {
426        let path = path.clone();
427        let sem = semaphore.clone();
428        read_set.spawn(async move {
429            let _permit = sem.acquire().await.expect("semaphore closed");
430            let result = tokio::fs::read_to_string(&path).await;
431            (path, result)
432        });
433    }
434
435    let mut file_contents = Vec::with_capacity(files.len());
436    while let Some(result) = read_set.join_next().await {
437        match result {
438            Ok((path, Ok(content))) => file_contents.push((path, content)),
439            Ok((path, Err(e))) => {
440                errors.push(LintelDiagnostic::Io {
441                    path: path.display().to_string(),
442                    message: format!("failed to read: {e}"),
443                });
444            }
445            Err(e) => tracing::warn!("file read task panicked: {e}"),
446        }
447    }
448
449    file_contents
450}
451
452/// Parse pre-read file contents, extract schema URIs, apply rewrites, and
453/// group by resolved schema URI.
454#[tracing::instrument(skip_all, fields(file_count = file_contents.len()))]
455#[allow(clippy::too_many_arguments)]
456fn parse_and_group_contents(
457    file_contents: Vec<(PathBuf, String)>,
458    config: &lintel_config::Config,
459    config_dir: &Path,
460    compiled_catalogs: &[CompiledCatalog],
461    errors: &mut Vec<LintelDiagnostic>,
462) -> BTreeMap<String, Vec<ParsedFile>> {
463    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
464    for (path, content) in file_contents {
465        let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
466        for result in results {
467            match result {
468                FileResult::Parsed { schema_uri, parsed } => {
469                    schema_groups.entry(schema_uri).or_default().push(parsed);
470                }
471                FileResult::Error(e) => errors.push(e),
472                FileResult::Skip => {}
473            }
474        }
475    }
476
477    schema_groups
478}
479
480// ---------------------------------------------------------------------------
481// Phase 2: Schema fetching, compilation, and instance validation
482// ---------------------------------------------------------------------------
483
484/// Fetch a schema by URI, returning its parsed JSON and cache status.
485///
486/// For remote URIs, checks the prefetched map first; for local URIs, reads
487/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
488#[allow(clippy::too_many_arguments)]
489async fn fetch_schema_from_prefetched(
490    schema_uri: &str,
491    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
492    local_cache: &mut HashMap<String, Value>,
493    group: &[ParsedFile],
494    errors: &mut Vec<LintelDiagnostic>,
495    checked: &mut Vec<CheckedFile>,
496    on_check: &mut impl FnMut(&CheckedFile),
497) -> Option<(Value, Option<CacheStatus>)> {
498    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
499
500    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
501        match prefetched.get(schema_uri) {
502            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
503            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
504            None => Err(format!("schema not prefetched: {schema_uri}")),
505        }
506    } else if let Some(cached) = local_cache.get(schema_uri) {
507        Ok((cached.clone(), None))
508    } else {
509        tokio::fs::read_to_string(schema_uri)
510            .await
511            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
512            .and_then(|content| {
513                serde_json::from_str::<Value>(&content)
514                    .map(|v| {
515                        local_cache.insert(schema_uri.to_string(), v.clone());
516                        (v, None)
517                    })
518                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
519            })
520    };
521
522    match result {
523        Ok(value) => Some(value),
524        Err(message) => {
525            report_group_error(
526                |path| LintelDiagnostic::SchemaFetch {
527                    path: path.to_string(),
528                    message: message.clone(),
529                },
530                schema_uri,
531                None,
532                group,
533                errors,
534                checked,
535                on_check,
536            );
537            None
538        }
539    }
540}
541
542/// Report the same error for every file in a schema group.
543#[allow(clippy::too_many_arguments)]
544fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
545    make_error: impl Fn(&str) -> LintelDiagnostic,
546    schema_uri: &str,
547    cache_status: Option<CacheStatus>,
548    group: &[P],
549    errors: &mut Vec<LintelDiagnostic>,
550    checked: &mut Vec<CheckedFile>,
551    on_check: &mut impl FnMut(&CheckedFile),
552) {
553    for item in group {
554        let pf = item.borrow();
555        let cf = CheckedFile {
556            path: pf.path.clone(),
557            schema: schema_uri.to_string(),
558            cache_status,
559            validation_cache_status: None,
560        };
561        on_check(&cf);
562        checked.push(cf);
563        errors.push(make_error(&pf.path));
564    }
565}
566
567/// Mark every file in a group as checked (no errors).
568#[allow(clippy::too_many_arguments)]
569fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
570    schema_uri: &str,
571    cache_status: Option<CacheStatus>,
572    validation_cache_status: Option<ValidationCacheStatus>,
573    group: &[P],
574    checked: &mut Vec<CheckedFile>,
575    on_check: &mut impl FnMut(&CheckedFile),
576) {
577    for item in group {
578        let pf = item.borrow();
579        let cf = CheckedFile {
580            path: pf.path.clone(),
581            schema: schema_uri.to_string(),
582            cache_status,
583            validation_cache_status,
584        };
585        on_check(&cf);
586        checked.push(cf);
587    }
588}
589
590/// Convert [`ValidationError`]s into [`LintelDiagnostic::Validation`] diagnostics.
591#[allow(clippy::too_many_arguments)]
592fn push_validation_errors(
593    pf: &ParsedFile,
594    schema_url: &str,
595    validation_errors: &[ValidationError],
596    errors: &mut Vec<LintelDiagnostic>,
597    schema: Option<&Value>,
598) {
599    for ve in validation_errors {
600        let instance_path = if ve.instance_path.is_empty() {
601            DEFAULT_LABEL.to_string()
602        } else {
603            ve.instance_path.clone()
604        };
605        let label = format_label(&instance_path, &ve.schema_path);
606        let source_span: miette::SourceSpan = ve.span.into();
607        let mut message = ve.kind.message();
608        if let ValidationErrorKind::AdditionalProperty { ref property } = ve.kind
609            && let Some(s) = schema
610            && let Some(suggestion) = suggest::suggest_property(property, &ve.schema_path, s)
611        {
612            message = format!("{message}; did you mean '{suggestion}'?");
613        }
614        errors.push(LintelDiagnostic::Validation(ValidationDiagnostic {
615            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
616            span: source_span,
617            schema_span: source_span,
618            path: pf.path.clone(),
619            instance_path,
620            label,
621            message,
622            schema_url: schema_url.to_string(),
623            schema_path: ve.schema_path.clone(),
624            validation_code: format!("validation({})", ve.kind.as_ref()),
625        }));
626    }
627}
628
629/// Map a `jsonschema::error::ValidationErrorKind` to our serializable
630/// [`ValidationErrorKind`]. `AdditionalProperties` is handled separately
631/// in [`convert_error`].
632fn convert_kind(kind: &jsonschema::error::ValidationErrorKind) -> ValidationErrorKind {
633    use jsonschema::error::{TypeKind, ValidationErrorKind as JK};
634
635    match kind {
636        JK::AdditionalItems { limit } => ValidationErrorKind::AdditionalItems { limit: *limit },
637        JK::AdditionalProperties { .. } => unreachable!("handled in convert_error"),
638        JK::AnyOf { .. } => ValidationErrorKind::AnyOf,
639        JK::BacktrackLimitExceeded { error } => ValidationErrorKind::BacktrackLimitExceeded {
640            message: error.to_string(),
641        },
642        JK::Constant { expected_value } => ValidationErrorKind::Constant {
643            expected_value: expected_value.clone(),
644        },
645        JK::Contains => ValidationErrorKind::Contains,
646        JK::ContentEncoding { content_encoding } => ValidationErrorKind::ContentEncoding {
647            content_encoding: content_encoding.clone(),
648        },
649        JK::ContentMediaType { content_media_type } => ValidationErrorKind::ContentMediaType {
650            content_media_type: content_media_type.clone(),
651        },
652        JK::Custom { keyword, message } => ValidationErrorKind::Custom {
653            keyword: keyword.clone(),
654            message: message.clone(),
655        },
656        JK::Enum { options } => ValidationErrorKind::Enum {
657            options: options.clone(),
658        },
659        JK::ExclusiveMaximum { limit } => ValidationErrorKind::ExclusiveMaximum {
660            limit: limit.clone(),
661        },
662        JK::ExclusiveMinimum { limit } => ValidationErrorKind::ExclusiveMinimum {
663            limit: limit.clone(),
664        },
665        JK::FalseSchema => ValidationErrorKind::FalseSchema,
666        JK::Format { format } => ValidationErrorKind::Format {
667            format: format.clone(),
668        },
669        JK::FromUtf8 { error } => ValidationErrorKind::FromUtf8 {
670            message: error.to_string(),
671        },
672        JK::MaxItems { limit } => ValidationErrorKind::MaxItems { limit: *limit },
673        JK::Maximum { limit } => ValidationErrorKind::Maximum {
674            limit: limit.clone(),
675        },
676        JK::MaxLength { limit } => ValidationErrorKind::MaxLength { limit: *limit },
677        JK::MaxProperties { limit } => ValidationErrorKind::MaxProperties { limit: *limit },
678        JK::MinItems { limit } => ValidationErrorKind::MinItems { limit: *limit },
679        JK::Minimum { limit } => ValidationErrorKind::Minimum {
680            limit: limit.clone(),
681        },
682        JK::MinLength { limit } => ValidationErrorKind::MinLength { limit: *limit },
683        JK::MinProperties { limit } => ValidationErrorKind::MinProperties { limit: *limit },
684        JK::MultipleOf { multiple_of } => ValidationErrorKind::MultipleOf {
685            multiple_of: *multiple_of,
686        },
687        JK::Not { .. } => ValidationErrorKind::Not,
688        JK::OneOfMultipleValid { .. } => ValidationErrorKind::OneOfMultipleValid,
689        JK::OneOfNotValid { .. } => ValidationErrorKind::OneOfNotValid,
690        JK::Pattern { pattern } => ValidationErrorKind::Pattern {
691            pattern: pattern.clone(),
692        },
693        JK::PropertyNames { error } => ValidationErrorKind::PropertyNames {
694            message: error.to_string(),
695        },
696        JK::Required { property } => ValidationErrorKind::Required {
697            property: match property {
698                Value::String(s) => format!("\"{s}\""),
699                other => other.to_string(),
700            },
701        },
702        JK::Type { kind } => {
703            let expected = match kind {
704                TypeKind::Single(t) => t.to_string(),
705                TypeKind::Multiple(ts) => {
706                    let parts: Vec<String> = ts.iter().map(|t| t.to_string()).collect();
707                    parts.join(", ")
708                }
709            };
710            ValidationErrorKind::Type { expected }
711        }
712        JK::UnevaluatedItems { unexpected } => ValidationErrorKind::UnevaluatedItems {
713            unexpected: unexpected.clone(),
714        },
715        JK::UnevaluatedProperties { unexpected } => ValidationErrorKind::UnevaluatedProperties {
716            unexpected: unexpected.clone(),
717        },
718        JK::UniqueItems => ValidationErrorKind::UniqueItems,
719        JK::Referencing(err) => ValidationErrorKind::Referencing {
720            message: err.to_string(),
721        },
722    }
723}
724
725/// Convert a single `jsonschema::ValidationError` into one or more typed
726/// [`ValidationError`]s with pre-computed spans.
727///
728/// `AdditionalProperties` errors are split into one per unexpected property.
729fn convert_error(error: &jsonschema::ValidationError<'_>, content: &str) -> Vec<ValidationError> {
730    use jsonschema::error::ValidationErrorKind as JK;
731
732    let schema_path = error.schema_path().to_string();
733    let base_instance_path = error.instance_path().to_string();
734
735    if let JK::AdditionalProperties { unexpected } = error.kind() {
736        return unexpected
737            .iter()
738            .map(|prop| {
739                let instance_path = format!("{base_instance_path}/{prop}");
740                let span = find_instance_path_span(content, &instance_path);
741                ValidationError {
742                    instance_path,
743                    schema_path: schema_path.clone(),
744                    kind: ValidationErrorKind::AdditionalProperty {
745                        property: prop.clone(),
746                    },
747                    span,
748                }
749            })
750            .collect();
751    }
752
753    let span = find_instance_path_span(content, &base_instance_path);
754    vec![ValidationError {
755        instance_path: base_instance_path,
756        schema_path,
757        kind: convert_kind(error.kind()),
758        span,
759    }]
760}
761
762/// Validate all files in a group against an already-compiled validator and store
763/// results in the validation cache.
764#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
765#[allow(clippy::too_many_arguments)]
766async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
767    validator: &jsonschema::Validator,
768    schema_uri: &str,
769    schema_hash: &str,
770    validate_formats: bool,
771    cache_status: Option<CacheStatus>,
772    group: &[P],
773    schema_value: &Value,
774    vcache: &lintel_validation_cache::ValidationCache,
775    errors: &mut Vec<LintelDiagnostic>,
776    checked: &mut Vec<CheckedFile>,
777    on_check: &mut impl FnMut(&CheckedFile),
778) {
779    for item in group {
780        let pf = item.borrow();
781        let file_errors: Vec<ValidationError> = validator
782            .iter_errors(&pf.instance)
783            .flat_map(|error| convert_error(&error, &pf.content))
784            .collect();
785
786        vcache
787            .store(
788                &lintel_validation_cache::CacheKey {
789                    file_content: &pf.content,
790                    schema_hash,
791                    validate_formats,
792                },
793                &file_errors,
794            )
795            .await;
796        push_validation_errors(pf, schema_uri, &file_errors, errors, Some(schema_value));
797
798        let cf = CheckedFile {
799            path: pf.path.clone(),
800            schema: schema_uri.to_string(),
801            cache_status,
802            validation_cache_status: Some(ValidationCacheStatus::Miss),
803        };
804        on_check(&cf);
805        checked.push(cf);
806    }
807}
808
809// ---------------------------------------------------------------------------
810// Public API
811// ---------------------------------------------------------------------------
812
813/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
814///
815/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
816pub async fn fetch_compiled_catalogs(
817    retriever: &SchemaCache,
818    config: &lintel_config::Config,
819    no_catalog: bool,
820) -> Vec<CompiledCatalog> {
821    let mut compiled_catalogs = Vec::new();
822
823    if !no_catalog {
824        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
825
826        // Catalogs are fetched concurrently but sorted by priority so that
827        // the Lintel catalog wins over custom registries, which win over
828        // SchemaStore.  The `order` field encodes this precedence.
829        #[allow(clippy::items_after_statements)]
830        type CatalogResult = (
831            usize, // priority (lower = higher precedence)
832            String,
833            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
834        );
835        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
836
837        // Custom registries from lintel.toml (highest precedence among catalogs)
838        for (i, registry_url) in config.registries.iter().enumerate() {
839            let r = retriever.clone();
840            let url = registry_url.clone();
841            let label = format!("registry {url}");
842            catalog_tasks.spawn(async move {
843                let result = registry::fetch(&r, &url)
844                    .await
845                    .map(|cat| CompiledCatalog::compile(&cat));
846                (i, label, result)
847            });
848        }
849
850        // Lintel catalog
851        let lintel_order = config.registries.len();
852        if !config.no_default_catalog {
853            let r = retriever.clone();
854            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
855            catalog_tasks.spawn(async move {
856                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
857                    .await
858                    .map(|cat| CompiledCatalog::compile(&cat));
859                (lintel_order, label, result)
860            });
861        }
862
863        // SchemaStore catalog (lowest precedence)
864        let schemastore_order = config.registries.len() + 1;
865        let r = retriever.clone();
866        catalog_tasks.spawn(async move {
867            let result = catalog::fetch_catalog(&r)
868                .await
869                .map(|cat| CompiledCatalog::compile(&cat));
870            (schemastore_order, "SchemaStore catalog".to_string(), result)
871        });
872
873        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
874        while let Some(result) = catalog_tasks.join_next().await {
875            match result {
876                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
877                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
878                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
879            }
880        }
881        results.sort_by_key(|(order, _)| *order);
882        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
883
884        drop(catalog_span);
885    }
886
887    compiled_catalogs
888}
889
890/// # Errors
891///
892/// Returns an error if file collection or schema validation encounters an I/O error.
893pub async fn run(args: &ValidateArgs) -> Result<CheckResult> {
894    run_with(args, None, |_| {}).await
895}
896
897/// Like [`run`], but calls `on_check` each time a file is checked, allowing
898/// callers to stream progress (e.g. verbose output) as files are processed.
899///
900/// # Errors
901///
902/// Returns an error if file collection or schema validation encounters an I/O error.
903pub async fn run_with(
904    args: &ValidateArgs,
905    cache: Option<SchemaCache>,
906    on_check: impl FnMut(&CheckedFile),
907) -> Result<CheckResult> {
908    let files = collect_files(&args.globs, &args.exclude)?;
909    run_with_files(args, cache, files, on_check).await
910}
911
912/// Like [`run_with`] but operates on a pre-discovered file list.
913///
914/// Use this when files have already been collected (e.g. by `lintel check`
915/// doing a single discovery pass shared between validate and format).
916///
917/// # Errors
918///
919/// Returns an error if schema validation encounters an I/O error.
920#[tracing::instrument(skip_all, name = "validate")]
921#[allow(clippy::too_many_lines)]
922pub async fn run_with_files(
923    args: &ValidateArgs,
924    cache: Option<SchemaCache>,
925    files: Vec<PathBuf>,
926    mut on_check: impl FnMut(&CheckedFile),
927) -> Result<CheckResult> {
928    let retriever = build_retriever(args, cache);
929    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
930    tracing::info!(file_count = files.len(), "collected files");
931
932    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
933
934    let mut errors: Vec<LintelDiagnostic> = Vec::new();
935    let file_contents = read_files(&files, &mut errors).await;
936
937    run_with_contents_inner(
938        file_contents,
939        args,
940        retriever,
941        config,
942        &config_dir,
943        compiled_catalogs,
944        errors,
945        &mut on_check,
946    )
947    .await
948}
949
950/// Like [`run_with`], but accepts pre-read file contents instead of reading
951/// from disk. Use this when the caller has already read files (e.g. to share
952/// reads between format checking and validation).
953///
954/// # Errors
955///
956/// Returns an error if schema validation encounters an I/O or network error.
957pub async fn run_with_contents(
958    args: &ValidateArgs,
959    file_contents: Vec<(PathBuf, String)>,
960    cache: Option<SchemaCache>,
961    mut on_check: impl FnMut(&CheckedFile),
962) -> Result<CheckResult> {
963    let retriever = build_retriever(args, cache);
964    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
965    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
966    let errors: Vec<LintelDiagnostic> = Vec::new();
967
968    run_with_contents_inner(
969        file_contents,
970        args,
971        retriever,
972        config,
973        &config_dir,
974        compiled_catalogs,
975        errors,
976        &mut on_check,
977    )
978    .await
979}
980
981fn build_retriever(args: &ValidateArgs, cache: Option<SchemaCache>) -> SchemaCache {
982    if let Some(c) = cache {
983        return c;
984    }
985    let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
986    if let Some(dir) = &args.cache_dir {
987        let path = PathBuf::from(dir);
988        let _ = fs::create_dir_all(&path);
989        builder = builder.cache_dir(path);
990    }
991    if let Some(ttl) = args.schema_cache_ttl {
992        builder = builder.ttl(ttl);
993    }
994    builder.build()
995}
996
997#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
998async fn run_with_contents_inner(
999    file_contents: Vec<(PathBuf, String)>,
1000    args: &ValidateArgs,
1001    retriever: SchemaCache,
1002    config: lintel_config::Config,
1003    config_dir: &Path,
1004    compiled_catalogs: Vec<CompiledCatalog>,
1005    mut errors: Vec<LintelDiagnostic>,
1006    on_check: &mut impl FnMut(&CheckedFile),
1007) -> Result<CheckResult> {
1008    let mut checked: Vec<CheckedFile> = Vec::new();
1009
1010    // Phase 1: Parse files and resolve schema URIs
1011    let schema_groups = parse_and_group_contents(
1012        file_contents,
1013        &config,
1014        config_dir,
1015        &compiled_catalogs,
1016        &mut errors,
1017    );
1018    tracing::info!(
1019        schema_count = schema_groups.len(),
1020        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
1021        "grouped files by schema"
1022    );
1023
1024    // Create validation cache
1025    let vcache = lintel_validation_cache::ValidationCache::new(
1026        lintel_validation_cache::ensure_cache_dir(),
1027        args.force_validation,
1028    );
1029
1030    // Prefetch all remote schemas in parallel
1031    let remote_uris: Vec<&String> = schema_groups
1032        .keys()
1033        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
1034        .collect();
1035
1036    let prefetched = {
1037        let _prefetch_span =
1038            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
1039
1040        let mut schema_tasks = tokio::task::JoinSet::new();
1041        for uri in remote_uris {
1042            let r = retriever.clone();
1043            let u = uri.clone();
1044            schema_tasks.spawn(async move {
1045                let result = r.fetch(&u).await;
1046                (u, result)
1047            });
1048        }
1049
1050        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
1051        while let Some(result) = schema_tasks.join_next().await {
1052            match result {
1053                Ok((uri, fetch_result)) => {
1054                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
1055                }
1056                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
1057            }
1058        }
1059
1060        prefetched
1061    };
1062
1063    // Phase 2: Compile each schema once and validate all matching files
1064    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
1065    let mut fetch_time = core::time::Duration::ZERO;
1066    let mut hash_time = core::time::Duration::ZERO;
1067    let mut vcache_time = core::time::Duration::ZERO;
1068    let mut compile_time = core::time::Duration::ZERO;
1069    let mut validate_time = core::time::Duration::ZERO;
1070
1071    for (schema_uri, group) in &schema_groups {
1072        let _group_span = tracing::debug_span!(
1073            "schema_group",
1074            schema = schema_uri.as_str(),
1075            files = group.len(),
1076        )
1077        .entered();
1078
1079        // If ANY file in the group matches a `validate_formats = false` override,
1080        // disable format validation for the whole group (they share one compiled validator).
1081        let validate_formats = group.iter().all(|pf| {
1082            config
1083                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
1084        });
1085
1086        // Remote schemas were prefetched in parallel above; local schemas are
1087        // read from disk here (with in-memory caching).
1088        let t = std::time::Instant::now();
1089        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
1090            schema_uri,
1091            &prefetched,
1092            &mut local_schema_cache,
1093            group,
1094            &mut errors,
1095            &mut checked,
1096            on_check,
1097        )
1098        .await
1099        else {
1100            fetch_time += t.elapsed();
1101            continue;
1102        };
1103        fetch_time += t.elapsed();
1104
1105        // Pre-compute schema hash once for the entire group.
1106        let t = std::time::Instant::now();
1107        let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
1108        hash_time += t.elapsed();
1109
1110        // Split the group into validation cache hits and misses.
1111        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
1112
1113        let t = std::time::Instant::now();
1114        for pf in group {
1115            let (cached, vcache_status) = vcache
1116                .lookup(&lintel_validation_cache::CacheKey {
1117                    file_content: &pf.content,
1118                    schema_hash: &schema_hash,
1119                    validate_formats,
1120                })
1121                .await;
1122
1123            if let Some(cached_errors) = cached {
1124                push_validation_errors(
1125                    pf,
1126                    schema_uri,
1127                    &cached_errors,
1128                    &mut errors,
1129                    Some(&schema_value),
1130                );
1131                let cf = CheckedFile {
1132                    path: pf.path.clone(),
1133                    schema: schema_uri.clone(),
1134                    cache_status,
1135                    validation_cache_status: Some(vcache_status),
1136                };
1137                on_check(&cf);
1138                checked.push(cf);
1139            } else {
1140                cache_misses.push(pf);
1141            }
1142        }
1143        vcache_time += t.elapsed();
1144
1145        tracing::debug!(
1146            cache_hits = group.len() - cache_misses.len(),
1147            cache_misses = cache_misses.len(),
1148            "validation cache"
1149        );
1150
1151        // If all files hit the validation cache, skip schema compilation entirely.
1152        if cache_misses.is_empty() {
1153            continue;
1154        }
1155
1156        // Compile the schema for cache misses.
1157        let t = std::time::Instant::now();
1158        let validator = {
1159            // Set base URI so relative $ref values (e.g. "./rule.json") resolve
1160            // correctly. Remote schemas use the HTTP URI directly; local schemas
1161            // get a file:// URI derived from the canonical absolute path.
1162            let is_remote_schema =
1163                schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
1164            let local_retriever = LocalRetriever {
1165                http: retriever.clone(),
1166            };
1167            let opts = jsonschema::async_options()
1168                .with_retriever(local_retriever)
1169                .should_validate_formats(validate_formats);
1170            let base_uri = if is_remote_schema {
1171                // Strip fragment (e.g. "#") — base URIs must not contain fragments.
1172                let uri = match schema_uri.find('#') {
1173                    Some(pos) => schema_uri[..pos].to_string(),
1174                    None => schema_uri.clone(),
1175                };
1176                Some(uri)
1177            } else {
1178                std::fs::canonicalize(schema_uri)
1179                    .ok()
1180                    .map(|p| format!("file://{}", p.display()))
1181            };
1182            let opts = if let Some(uri) = base_uri {
1183                opts.with_base_uri(uri)
1184            } else {
1185                opts
1186            };
1187            match opts.build(&schema_value).await {
1188                Ok(v) => v,
1189                Err(e) => {
1190                    compile_time += t.elapsed();
1191                    // When format validation is disabled and the compilation error
1192                    // is a uri-reference issue (e.g. Rust-style $ref paths in
1193                    // vector.json), skip validation silently.
1194                    if !validate_formats && e.to_string().contains("uri-reference") {
1195                        mark_group_checked(
1196                            schema_uri,
1197                            cache_status,
1198                            Some(ValidationCacheStatus::Miss),
1199                            &cache_misses,
1200                            &mut checked,
1201                            on_check,
1202                        );
1203                        continue;
1204                    }
1205                    let msg = format!("failed to compile schema: {e}");
1206                    report_group_error(
1207                        |path| LintelDiagnostic::SchemaCompile {
1208                            path: path.to_string(),
1209                            message: msg.clone(),
1210                        },
1211                        schema_uri,
1212                        cache_status,
1213                        &cache_misses,
1214                        &mut errors,
1215                        &mut checked,
1216                        on_check,
1217                    );
1218                    continue;
1219                }
1220            }
1221        };
1222        compile_time += t.elapsed();
1223
1224        let t = std::time::Instant::now();
1225        validate_group(
1226            &validator,
1227            schema_uri,
1228            &schema_hash,
1229            validate_formats,
1230            cache_status,
1231            &cache_misses,
1232            &schema_value,
1233            &vcache,
1234            &mut errors,
1235            &mut checked,
1236            on_check,
1237        )
1238        .await;
1239        validate_time += t.elapsed();
1240    }
1241
1242    #[allow(clippy::cast_possible_truncation)]
1243    {
1244        tracing::info!(
1245            fetch_ms = fetch_time.as_millis() as u64,
1246            hash_ms = hash_time.as_millis() as u64,
1247            vcache_ms = vcache_time.as_millis() as u64,
1248            compile_ms = compile_time.as_millis() as u64,
1249            validate_ms = validate_time.as_millis() as u64,
1250            "phase2 breakdown"
1251        );
1252    }
1253
1254    // Sort errors for deterministic output (by path, then by span offset)
1255    errors.sort_by(|a, b| {
1256        a.path()
1257            .cmp(b.path())
1258            .then_with(|| a.offset().cmp(&b.offset()))
1259    });
1260
1261    Ok(CheckResult { errors, checked })
1262}
1263
1264#[cfg(test)]
1265mod tests {
1266    use super::*;
1267    use lintel_schema_cache::SchemaCache;
1268    use std::path::Path;
1269
1270    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1271        let cache = SchemaCache::memory();
1272        for (uri, body) in entries {
1273            cache.insert(
1274                uri,
1275                serde_json::from_str(body).expect("test mock: invalid JSON"),
1276            );
1277        }
1278        cache
1279    }
1280
1281    fn testdata() -> PathBuf {
1282        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1283    }
1284
1285    /// Build glob patterns that scan one or more testdata directories for all supported file types.
1286    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1287        dirs.iter()
1288            .flat_map(|dir| {
1289                let base = testdata().join(dir);
1290                vec![
1291                    base.join("*.json").to_string_lossy().to_string(),
1292                    base.join("*.yaml").to_string_lossy().to_string(),
1293                    base.join("*.yml").to_string_lossy().to_string(),
1294                    base.join("*.json5").to_string_lossy().to_string(),
1295                    base.join("*.jsonc").to_string_lossy().to_string(),
1296                    base.join("*.toml").to_string_lossy().to_string(),
1297                ]
1298            })
1299            .collect()
1300    }
1301
1302    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1303        ValidateArgs {
1304            globs: scenario_globs(dirs),
1305            exclude: vec![],
1306            cache_dir: None,
1307            force_schema_fetch: true,
1308            force_validation: true,
1309            no_catalog: true,
1310            config_dir: None,
1311            schema_cache_ttl: None,
1312        }
1313    }
1314
1315    const SCHEMA: &str =
1316        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1317
1318    fn schema_mock() -> SchemaCache {
1319        mock(&[("https://example.com/schema.json", SCHEMA)])
1320    }
1321
1322    // --- Directory scanning tests ---
1323
1324    #[tokio::test]
1325    async fn no_matching_files() -> anyhow::Result<()> {
1326        let tmp = tempfile::tempdir()?;
1327        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1328        let c = ValidateArgs {
1329            globs: vec![pattern],
1330            exclude: vec![],
1331            cache_dir: None,
1332            force_schema_fetch: true,
1333            force_validation: true,
1334            no_catalog: true,
1335            config_dir: None,
1336            schema_cache_ttl: None,
1337        };
1338        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1339        assert!(!result.has_errors());
1340        Ok(())
1341    }
1342
1343    #[tokio::test]
1344    async fn dir_all_valid() -> anyhow::Result<()> {
1345        let c = args_for_dirs(&["positive_tests"]);
1346        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1347        assert!(!result.has_errors());
1348        Ok(())
1349    }
1350
1351    #[tokio::test]
1352    async fn dir_all_invalid() -> anyhow::Result<()> {
1353        let c = args_for_dirs(&["negative_tests"]);
1354        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1355        assert!(result.has_errors());
1356        Ok(())
1357    }
1358
1359    #[tokio::test]
1360    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1361        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1362        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1363        assert!(result.has_errors());
1364        Ok(())
1365    }
1366
1367    #[tokio::test]
1368    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1369        let c = args_for_dirs(&["no_schema"]);
1370        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1371        assert!(!result.has_errors());
1372        Ok(())
1373    }
1374
1375    #[tokio::test]
1376    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1377        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1378        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1379        assert!(!result.has_errors());
1380        Ok(())
1381    }
1382
1383    // --- Directory as positional arg ---
1384
1385    #[tokio::test]
1386    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1387        let dir = testdata().join("positive_tests");
1388        let c = ValidateArgs {
1389            globs: vec![dir.to_string_lossy().to_string()],
1390            exclude: vec![],
1391            cache_dir: None,
1392            force_schema_fetch: true,
1393            force_validation: true,
1394            no_catalog: true,
1395            config_dir: None,
1396            schema_cache_ttl: None,
1397        };
1398        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1399        assert!(!result.has_errors());
1400        assert!(result.files_checked() > 0);
1401        Ok(())
1402    }
1403
1404    #[tokio::test]
1405    async fn multiple_directory_args() -> anyhow::Result<()> {
1406        let pos_dir = testdata().join("positive_tests");
1407        let no_schema_dir = testdata().join("no_schema");
1408        let c = ValidateArgs {
1409            globs: vec![
1410                pos_dir.to_string_lossy().to_string(),
1411                no_schema_dir.to_string_lossy().to_string(),
1412            ],
1413            exclude: vec![],
1414            cache_dir: None,
1415            force_schema_fetch: true,
1416            force_validation: true,
1417            no_catalog: true,
1418            config_dir: None,
1419            schema_cache_ttl: None,
1420        };
1421        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1422        assert!(!result.has_errors());
1423        Ok(())
1424    }
1425
1426    #[tokio::test]
1427    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1428        let dir = testdata().join("positive_tests");
1429        let glob_pattern = testdata()
1430            .join("no_schema")
1431            .join("*.json")
1432            .to_string_lossy()
1433            .to_string();
1434        let c = ValidateArgs {
1435            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1436            exclude: vec![],
1437            cache_dir: None,
1438            force_schema_fetch: true,
1439            force_validation: true,
1440            no_catalog: true,
1441            config_dir: None,
1442            schema_cache_ttl: None,
1443        };
1444        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1445        assert!(!result.has_errors());
1446        Ok(())
1447    }
1448
1449    #[tokio::test]
1450    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1451        let base = testdata().join("malformed");
1452        let c = ValidateArgs {
1453            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1454            exclude: vec![],
1455            cache_dir: None,
1456            force_schema_fetch: true,
1457            force_validation: true,
1458            no_catalog: true,
1459            config_dir: None,
1460            schema_cache_ttl: None,
1461        };
1462        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1463        assert!(result.has_errors());
1464        Ok(())
1465    }
1466
1467    #[tokio::test]
1468    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1469        let base = testdata().join("malformed");
1470        let c = ValidateArgs {
1471            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1472            exclude: vec![],
1473            cache_dir: None,
1474            force_schema_fetch: true,
1475            force_validation: true,
1476            no_catalog: true,
1477            config_dir: None,
1478            schema_cache_ttl: None,
1479        };
1480        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1481        assert!(result.has_errors());
1482        Ok(())
1483    }
1484
1485    // --- Exclude filter ---
1486
1487    #[tokio::test]
1488    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1489        let base = testdata().join("negative_tests");
1490        let c = ValidateArgs {
1491            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1492            exclude: vec![
1493                base.join("missing_name.json").to_string_lossy().to_string(),
1494                base.join("missing_name.toml").to_string_lossy().to_string(),
1495                base.join("missing_name.yaml").to_string_lossy().to_string(),
1496            ],
1497            cache_dir: None,
1498            force_schema_fetch: true,
1499            force_validation: true,
1500            no_catalog: true,
1501            config_dir: None,
1502            schema_cache_ttl: None,
1503        };
1504        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1505        assert!(!result.has_errors());
1506        Ok(())
1507    }
1508
1509    // --- Cache options ---
1510
1511    #[tokio::test]
1512    async fn custom_cache_dir() -> anyhow::Result<()> {
1513        let c = ValidateArgs {
1514            globs: scenario_globs(&["positive_tests"]),
1515            exclude: vec![],
1516            cache_dir: None,
1517            force_schema_fetch: true,
1518            force_validation: true,
1519            no_catalog: true,
1520            config_dir: None,
1521            schema_cache_ttl: None,
1522        };
1523        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1524        assert!(!result.has_errors());
1525        Ok(())
1526    }
1527
1528    // --- Local schema ---
1529
1530    #[tokio::test]
1531    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1532        let tmp = tempfile::tempdir()?;
1533        let schema_path = tmp.path().join("schema.json");
1534        fs::write(&schema_path, SCHEMA)?;
1535
1536        let f = tmp.path().join("valid.json");
1537        fs::write(
1538            &f,
1539            format!(
1540                r#"{{"$schema":"{}","name":"hello"}}"#,
1541                schema_path.to_string_lossy()
1542            ),
1543        )?;
1544
1545        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1546        let c = ValidateArgs {
1547            globs: vec![pattern],
1548            exclude: vec![],
1549            cache_dir: None,
1550            force_schema_fetch: true,
1551            force_validation: true,
1552            no_catalog: true,
1553            config_dir: None,
1554            schema_cache_ttl: None,
1555        };
1556        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1557        assert!(!result.has_errors());
1558        Ok(())
1559    }
1560
1561    #[tokio::test]
1562    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1563        let tmp = tempfile::tempdir()?;
1564        let schema_path = tmp.path().join("schema.json");
1565        fs::write(&schema_path, SCHEMA)?;
1566
1567        let f = tmp.path().join("valid.yaml");
1568        fs::write(
1569            &f,
1570            format!(
1571                "# yaml-language-server: $schema={}\nname: hello\n",
1572                schema_path.to_string_lossy()
1573            ),
1574        )?;
1575
1576        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1577        let c = ValidateArgs {
1578            globs: vec![pattern],
1579            exclude: vec![],
1580            cache_dir: None,
1581            force_schema_fetch: true,
1582            force_validation: true,
1583            no_catalog: true,
1584            config_dir: None,
1585            schema_cache_ttl: None,
1586        };
1587        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1588        assert!(!result.has_errors());
1589        Ok(())
1590    }
1591
1592    #[tokio::test]
1593    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1594        let tmp = tempfile::tempdir()?;
1595        let f = tmp.path().join("ref.json");
1596        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1597
1598        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1599        let c = ValidateArgs {
1600            globs: vec![pattern],
1601            exclude: vec![],
1602            cache_dir: None,
1603            force_schema_fetch: true,
1604            force_validation: true,
1605            no_catalog: true,
1606            config_dir: None,
1607            schema_cache_ttl: None,
1608        };
1609        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1610        assert!(result.has_errors());
1611        Ok(())
1612    }
1613
1614    // --- JSON5 / JSONC tests ---
1615
1616    #[tokio::test]
1617    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1618        let tmp = tempfile::tempdir()?;
1619        let schema_path = tmp.path().join("schema.json");
1620        fs::write(&schema_path, SCHEMA)?;
1621
1622        let f = tmp.path().join("config.json5");
1623        fs::write(
1624            &f,
1625            format!(
1626                r#"{{
1627  // JSON5 comment
1628  "$schema": "{}",
1629  name: "hello",
1630}}"#,
1631                schema_path.to_string_lossy()
1632            ),
1633        )?;
1634
1635        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1636        let c = ValidateArgs {
1637            globs: vec![pattern],
1638            exclude: vec![],
1639            cache_dir: None,
1640            force_schema_fetch: true,
1641            force_validation: true,
1642            no_catalog: true,
1643            config_dir: None,
1644            schema_cache_ttl: None,
1645        };
1646        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1647        assert!(!result.has_errors());
1648        Ok(())
1649    }
1650
1651    #[tokio::test]
1652    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1653        let tmp = tempfile::tempdir()?;
1654        let schema_path = tmp.path().join("schema.json");
1655        fs::write(&schema_path, SCHEMA)?;
1656
1657        let f = tmp.path().join("config.jsonc");
1658        fs::write(
1659            &f,
1660            format!(
1661                r#"{{
1662  /* JSONC comment */
1663  "$schema": "{}",
1664  "name": "hello"
1665}}"#,
1666                schema_path.to_string_lossy()
1667            ),
1668        )?;
1669
1670        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1671        let c = ValidateArgs {
1672            globs: vec![pattern],
1673            exclude: vec![],
1674            cache_dir: None,
1675            force_schema_fetch: true,
1676            force_validation: true,
1677            no_catalog: true,
1678            config_dir: None,
1679            schema_cache_ttl: None,
1680        };
1681        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1682        assert!(!result.has_errors());
1683        Ok(())
1684    }
1685
1686    // --- Catalog-based schema matching ---
1687
1688    const GH_WORKFLOW_SCHEMA: &str = r#"{
1689        "type": "object",
1690        "properties": {
1691            "name": { "type": "string" },
1692            "on": {},
1693            "jobs": { "type": "object" }
1694        },
1695        "required": ["on", "jobs"]
1696    }"#;
1697
1698    fn gh_catalog_json() -> String {
1699        r#"{"version":1,"schemas":[{
1700            "name": "GitHub Workflow",
1701            "description": "GitHub Actions workflow",
1702            "url": "https://www.schemastore.org/github-workflow.json",
1703            "fileMatch": [
1704                "**/.github/workflows/*.yml",
1705                "**/.github/workflows/*.yaml"
1706            ]
1707        }]}"#
1708            .to_string()
1709    }
1710
1711    #[tokio::test]
1712    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1713        let tmp = tempfile::tempdir()?;
1714        let cache_tmp = tempfile::tempdir()?;
1715        let wf_dir = tmp.path().join(".github/workflows");
1716        fs::create_dir_all(&wf_dir)?;
1717        fs::write(
1718            wf_dir.join("ci.yml"),
1719            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1720        )?;
1721
1722        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1723        let client = mock(&[
1724            (
1725                "https://www.schemastore.org/api/json/catalog.json",
1726                &gh_catalog_json(),
1727            ),
1728            (
1729                "https://www.schemastore.org/github-workflow.json",
1730                GH_WORKFLOW_SCHEMA,
1731            ),
1732        ]);
1733        let c = ValidateArgs {
1734            globs: vec![pattern],
1735            exclude: vec![],
1736            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1737            force_schema_fetch: true,
1738            force_validation: true,
1739            no_catalog: false,
1740            config_dir: None,
1741            schema_cache_ttl: None,
1742        };
1743        let result = run_with(&c, Some(client), |_| {}).await?;
1744        assert!(!result.has_errors());
1745        Ok(())
1746    }
1747
1748    #[tokio::test]
1749    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1750        let tmp = tempfile::tempdir()?;
1751        let cache_tmp = tempfile::tempdir()?;
1752        let wf_dir = tmp.path().join(".github/workflows");
1753        fs::create_dir_all(&wf_dir)?;
1754        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1755
1756        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1757        let client = mock(&[
1758            (
1759                "https://www.schemastore.org/api/json/catalog.json",
1760                &gh_catalog_json(),
1761            ),
1762            (
1763                "https://www.schemastore.org/github-workflow.json",
1764                GH_WORKFLOW_SCHEMA,
1765            ),
1766        ]);
1767        let c = ValidateArgs {
1768            globs: vec![pattern],
1769            exclude: vec![],
1770            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1771            force_schema_fetch: true,
1772            force_validation: true,
1773            no_catalog: false,
1774            config_dir: None,
1775            schema_cache_ttl: None,
1776        };
1777        let result = run_with(&c, Some(client), |_| {}).await?;
1778        assert!(result.has_errors());
1779        Ok(())
1780    }
1781
1782    #[tokio::test]
1783    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1784        let tmp = tempfile::tempdir()?;
1785        let cache_tmp = tempfile::tempdir()?;
1786        let wf_dir = tmp.path().join(".github/workflows");
1787        fs::create_dir_all(&wf_dir)?;
1788        fs::write(
1789            wf_dir.join("ci.yml"),
1790            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1791        )?;
1792
1793        let client = mock(&[
1794            (
1795                "https://www.schemastore.org/api/json/catalog.json",
1796                &gh_catalog_json(),
1797            ),
1798            (
1799                "https://www.schemastore.org/github-workflow.json",
1800                GH_WORKFLOW_SCHEMA,
1801            ),
1802        ]);
1803        let c = ValidateArgs {
1804            globs: vec![],
1805            exclude: vec![],
1806            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1807            force_schema_fetch: true,
1808            force_validation: true,
1809            no_catalog: false,
1810            config_dir: None,
1811            schema_cache_ttl: None,
1812        };
1813
1814        let orig_dir = std::env::current_dir()?;
1815        std::env::set_current_dir(tmp.path())?;
1816        let result = run_with(&c, Some(client), |_| {}).await?;
1817        std::env::set_current_dir(orig_dir)?;
1818
1819        assert!(!result.has_errors());
1820        Ok(())
1821    }
1822
1823    // --- TOML tests ---
1824
1825    #[tokio::test]
1826    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1827        let tmp = tempfile::tempdir()?;
1828        let schema_path = tmp.path().join("schema.json");
1829        fs::write(&schema_path, SCHEMA)?;
1830
1831        let f = tmp.path().join("config.toml");
1832        fs::write(
1833            &f,
1834            format!(
1835                "# :schema {}\nname = \"hello\"\n",
1836                schema_path.to_string_lossy()
1837            ),
1838        )?;
1839
1840        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1841        let c = ValidateArgs {
1842            globs: vec![pattern],
1843            exclude: vec![],
1844            cache_dir: None,
1845            force_schema_fetch: true,
1846            force_validation: true,
1847            no_catalog: true,
1848            config_dir: None,
1849            schema_cache_ttl: None,
1850        };
1851        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1852        assert!(!result.has_errors());
1853        Ok(())
1854    }
1855
1856    // --- Rewrite rules + // resolution ---
1857
1858    #[tokio::test]
1859    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1860        let tmp = tempfile::tempdir()?;
1861
1862        let schemas_dir = tmp.path().join("schemas");
1863        fs::create_dir_all(&schemas_dir)?;
1864        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1865
1866        fs::write(
1867            tmp.path().join("lintel.toml"),
1868            r#"
1869[rewrite]
1870"http://localhost:9000/" = "//schemas/"
1871"#,
1872        )?;
1873
1874        let f = tmp.path().join("config.json");
1875        fs::write(
1876            &f,
1877            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1878        )?;
1879
1880        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1881        let c = ValidateArgs {
1882            globs: vec![pattern],
1883            exclude: vec![],
1884            cache_dir: None,
1885            force_schema_fetch: true,
1886            force_validation: true,
1887            no_catalog: true,
1888            config_dir: Some(tmp.path().to_path_buf()),
1889            schema_cache_ttl: None,
1890        };
1891
1892        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1893        assert!(!result.has_errors());
1894        assert_eq!(result.files_checked(), 1);
1895        Ok(())
1896    }
1897
1898    #[tokio::test]
1899    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1900        let tmp = tempfile::tempdir()?;
1901
1902        let schemas_dir = tmp.path().join("schemas");
1903        fs::create_dir_all(&schemas_dir)?;
1904        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1905
1906        fs::write(tmp.path().join("lintel.toml"), "")?;
1907
1908        let sub = tmp.path().join("deeply/nested");
1909        fs::create_dir_all(&sub)?;
1910        let f = sub.join("config.json");
1911        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1912
1913        let pattern = sub.join("*.json").to_string_lossy().to_string();
1914        let c = ValidateArgs {
1915            globs: vec![pattern],
1916            exclude: vec![],
1917            cache_dir: None,
1918            force_schema_fetch: true,
1919            force_validation: true,
1920            no_catalog: true,
1921            config_dir: Some(tmp.path().to_path_buf()),
1922            schema_cache_ttl: None,
1923        };
1924
1925        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1926        assert!(!result.has_errors());
1927        Ok(())
1928    }
1929
1930    // --- Format validation override ---
1931
1932    const FORMAT_SCHEMA: &str = r#"{
1933        "type": "object",
1934        "properties": {
1935            "link": { "type": "string", "format": "uri-reference" }
1936        }
1937    }"#;
1938
1939    #[tokio::test]
1940    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1941        let tmp = tempfile::tempdir()?;
1942        let schema_path = tmp.path().join("schema.json");
1943        fs::write(&schema_path, FORMAT_SCHEMA)?;
1944
1945        let f = tmp.path().join("data.json");
1946        fs::write(
1947            &f,
1948            format!(
1949                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1950                schema_path.to_string_lossy()
1951            ),
1952        )?;
1953
1954        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1955        let c = ValidateArgs {
1956            globs: vec![pattern],
1957            exclude: vec![],
1958            cache_dir: None,
1959            force_schema_fetch: true,
1960            force_validation: true,
1961            no_catalog: true,
1962            config_dir: Some(tmp.path().to_path_buf()),
1963            schema_cache_ttl: None,
1964        };
1965        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1966        assert!(
1967            result.has_errors(),
1968            "expected format error without override"
1969        );
1970        Ok(())
1971    }
1972
1973    #[tokio::test]
1974    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1975        let tmp = tempfile::tempdir()?;
1976        let schema_path = tmp.path().join("schema.json");
1977        fs::write(&schema_path, FORMAT_SCHEMA)?;
1978
1979        let f = tmp.path().join("data.json");
1980        fs::write(
1981            &f,
1982            format!(
1983                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1984                schema_path.to_string_lossy()
1985            ),
1986        )?;
1987
1988        // Use **/data.json to match the absolute path from the tempdir.
1989        fs::write(
1990            tmp.path().join("lintel.toml"),
1991            r#"
1992[[override]]
1993files = ["**/data.json"]
1994validate_formats = false
1995"#,
1996        )?;
1997
1998        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1999        let c = ValidateArgs {
2000            globs: vec![pattern],
2001            exclude: vec![],
2002            cache_dir: None,
2003            force_schema_fetch: true,
2004            force_validation: true,
2005            no_catalog: true,
2006            config_dir: Some(tmp.path().to_path_buf()),
2007            schema_cache_ttl: None,
2008        };
2009        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2010        assert!(
2011            !result.has_errors(),
2012            "expected no errors with validate_formats = false override"
2013        );
2014        Ok(())
2015    }
2016
2017    // --- Unrecognized extension handling ---
2018
2019    #[tokio::test]
2020    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
2021        let tmp = tempfile::tempdir()?;
2022        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
2023
2024        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
2025        let c = ValidateArgs {
2026            globs: vec![pattern],
2027            exclude: vec![],
2028            cache_dir: None,
2029            force_schema_fetch: true,
2030            force_validation: true,
2031            no_catalog: true,
2032            config_dir: Some(tmp.path().to_path_buf()),
2033            schema_cache_ttl: None,
2034        };
2035        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2036        assert!(!result.has_errors());
2037        assert_eq!(result.files_checked(), 0);
2038        Ok(())
2039    }
2040
2041    #[tokio::test]
2042    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
2043        let tmp = tempfile::tempdir()?;
2044        let cache_tmp = tempfile::tempdir()?;
2045        // File has .cfg extension (unrecognized) but content is valid JSON
2046        fs::write(
2047            tmp.path().join("myapp.cfg"),
2048            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
2049        )?;
2050
2051        let catalog_json = r#"{"version":1,"schemas":[{
2052            "name": "MyApp Config",
2053            "description": "MyApp configuration",
2054            "url": "https://example.com/myapp.schema.json",
2055            "fileMatch": ["*.cfg"]
2056        }]}"#;
2057        let schema =
2058            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2059
2060        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2061        let client = mock(&[
2062            (
2063                "https://www.schemastore.org/api/json/catalog.json",
2064                catalog_json,
2065            ),
2066            ("https://example.com/myapp.schema.json", schema),
2067        ]);
2068        let c = ValidateArgs {
2069            globs: vec![pattern],
2070            exclude: vec![],
2071            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2072            force_schema_fetch: true,
2073            force_validation: true,
2074            no_catalog: false,
2075            config_dir: Some(tmp.path().to_path_buf()),
2076            schema_cache_ttl: None,
2077        };
2078        let result = run_with(&c, Some(client), |_| {}).await?;
2079        assert!(!result.has_errors());
2080        assert_eq!(result.files_checked(), 1);
2081        Ok(())
2082    }
2083
2084    #[tokio::test]
2085    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
2086        let tmp = tempfile::tempdir()?;
2087        let cache_tmp = tempfile::tempdir()?;
2088        // File matches catalog but content isn't parseable by any format
2089        fs::write(
2090            tmp.path().join("myapp.cfg"),
2091            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
2092        )?;
2093
2094        let catalog_json = r#"{"version":1,"schemas":[{
2095            "name": "MyApp Config",
2096            "description": "MyApp configuration",
2097            "url": "https://example.com/myapp.schema.json",
2098            "fileMatch": ["*.cfg"]
2099        }]}"#;
2100
2101        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2102        let client = mock(&[(
2103            "https://www.schemastore.org/api/json/catalog.json",
2104            catalog_json,
2105        )]);
2106        let c = ValidateArgs {
2107            globs: vec![pattern],
2108            exclude: vec![],
2109            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2110            force_schema_fetch: true,
2111            force_validation: true,
2112            no_catalog: false,
2113            config_dir: Some(tmp.path().to_path_buf()),
2114            schema_cache_ttl: None,
2115        };
2116        let result = run_with(&c, Some(client), |_| {}).await?;
2117        assert!(!result.has_errors());
2118        assert_eq!(result.files_checked(), 0);
2119        Ok(())
2120    }
2121
2122    #[tokio::test]
2123    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
2124        let tmp = tempfile::tempdir()?;
2125        let cache_tmp = tempfile::tempdir()?;
2126        // File has .cfg extension, content is valid JSON but fails schema validation
2127        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
2128
2129        let catalog_json = r#"{"version":1,"schemas":[{
2130            "name": "MyApp Config",
2131            "description": "MyApp configuration",
2132            "url": "https://example.com/myapp.schema.json",
2133            "fileMatch": ["*.cfg"]
2134        }]}"#;
2135        let schema =
2136            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2137
2138        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2139        let client = mock(&[
2140            (
2141                "https://www.schemastore.org/api/json/catalog.json",
2142                catalog_json,
2143            ),
2144            ("https://example.com/myapp.schema.json", schema),
2145        ]);
2146        let c = ValidateArgs {
2147            globs: vec![pattern],
2148            exclude: vec![],
2149            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2150            force_schema_fetch: true,
2151            force_validation: true,
2152            no_catalog: false,
2153            config_dir: Some(tmp.path().to_path_buf()),
2154            schema_cache_ttl: None,
2155        };
2156        let result = run_with(&c, Some(client), |_| {}).await?;
2157        assert!(result.has_errors());
2158        assert_eq!(result.files_checked(), 1);
2159        Ok(())
2160    }
2161
2162    // --- Validation cache ---
2163
2164    #[tokio::test]
2165    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
2166        let tmp = tempfile::tempdir()?;
2167        let schema_path = tmp.path().join("schema.json");
2168        fs::write(&schema_path, SCHEMA)?;
2169
2170        let f = tmp.path().join("valid.json");
2171        fs::write(
2172            &f,
2173            format!(
2174                r#"{{"$schema":"{}","name":"hello"}}"#,
2175                schema_path.to_string_lossy()
2176            ),
2177        )?;
2178
2179        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2180
2181        // First run: force_validation = false so results get cached
2182        let c = ValidateArgs {
2183            globs: vec![pattern.clone()],
2184            exclude: vec![],
2185            cache_dir: None,
2186            force_schema_fetch: true,
2187            force_validation: false,
2188            no_catalog: true,
2189            config_dir: None,
2190            schema_cache_ttl: None,
2191        };
2192        let mut first_statuses = Vec::new();
2193        let result = run_with(&c, Some(mock(&[])), |cf| {
2194            first_statuses.push(cf.validation_cache_status);
2195        })
2196        .await?;
2197        assert!(!result.has_errors());
2198        assert!(result.files_checked() > 0);
2199
2200        // Verify the first run recorded a validation cache miss
2201        assert!(
2202            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2203            "expected at least one validation cache miss on first run"
2204        );
2205
2206        // Second run: same file, same schema — should hit validation cache
2207        let mut second_statuses = Vec::new();
2208        let result = run_with(&c, Some(mock(&[])), |cf| {
2209            second_statuses.push(cf.validation_cache_status);
2210        })
2211        .await?;
2212        assert!(!result.has_errors());
2213
2214        // Verify the second run got a validation cache hit
2215        assert!(
2216            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2217            "expected at least one validation cache hit on second run"
2218        );
2219        Ok(())
2220    }
2221
2222    /// Schemas whose URI contains a fragment (e.g. `…/draft-07/schema#`)
2223    /// must compile without error — the fragment is stripped before being
2224    /// used as the base URI for `$ref` resolution.
2225    #[tokio::test]
2226    async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2227        let tmp = tempfile::tempdir()?;
2228
2229        // A minimal draft-07 schema whose `$schema` ends with `#`.
2230        let schema_body = r#"{
2231            "$schema": "http://json-schema.org/draft-07/schema#",
2232            "type": "object",
2233            "properties": { "name": { "type": "string" } },
2234            "required": ["name"]
2235        }"#;
2236
2237        let schema_url = "http://json-schema.org/draft-07/schema#";
2238
2239        let f = tmp.path().join("data.json");
2240        fs::write(
2241            &f,
2242            format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2243        )?;
2244
2245        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2246        let client = mock(&[(
2247            // The schema URI with fragment — exactly as the `$schema` value appears.
2248            schema_url,
2249            schema_body,
2250        )]);
2251        let c = ValidateArgs {
2252            globs: vec![pattern],
2253            exclude: vec![],
2254            cache_dir: None,
2255            force_schema_fetch: true,
2256            force_validation: true,
2257            no_catalog: true,
2258            config_dir: None,
2259            schema_cache_ttl: None,
2260        };
2261        let result = run_with(&c, Some(client), |_| {}).await?;
2262        assert!(
2263            !result.has_errors(),
2264            "schema URI with fragment should not cause compilation error"
2265        );
2266        assert_eq!(result.files_checked(), 1);
2267        Ok(())
2268    }
2269
2270    #[tokio::test]
2271    async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2272        let tmp = tempfile::tempdir()?;
2273
2274        // Referenced schema with a "name" string definition
2275        std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2276
2277        // Main schema that uses a relative $ref
2278        let schema_path = tmp.path().join("schema.json");
2279        std::fs::write(
2280            &schema_path,
2281            r#"{
2282                "type": "object",
2283                "properties": {
2284                    "name": { "$ref": "./defs.json" }
2285                },
2286                "required": ["name"]
2287            }"#,
2288        )?;
2289
2290        // Valid data file pointing to the local schema
2291        let schema_uri = schema_path.to_string_lossy();
2292        std::fs::write(
2293            tmp.path().join("data.json"),
2294            format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2295        )?;
2296
2297        // Invalid data file (name should be a string per defs.json)
2298        std::fs::write(
2299            tmp.path().join("bad.json"),
2300            format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2301        )?;
2302
2303        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2304        let args = ValidateArgs {
2305            globs: vec![pattern],
2306            exclude: vec![],
2307            cache_dir: None,
2308            force_schema_fetch: true,
2309            force_validation: true,
2310            no_catalog: true,
2311            config_dir: None,
2312            schema_cache_ttl: None,
2313        };
2314        let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2315
2316        // The invalid file should produce an error (name is 42, not a string)
2317        assert!(result.has_errors());
2318        // Exactly one file should have errors (bad.json), the other (data.json) should pass
2319        assert_eq!(result.errors.len(), 1);
2320        Ok(())
2321    }
2322}