Skip to main content

lintel_validate/
validate.rs

1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use lintel_diagnostics::reporter::{CheckResult, CheckedFile};
11use lintel_diagnostics::{DEFAULT_LABEL, LintelDiagnostic, find_instance_path_span, format_label};
12use lintel_schema_cache::{CacheStatus, SchemaCache};
13use lintel_validation_cache::{ValidationCacheStatus, ValidationError, ValidationErrorKind};
14use schema_catalog::{CompiledCatalog, FileFormat};
15
16use crate::catalog;
17use crate::discover;
18use crate::parsers::{self, Parser};
19use crate::registry;
20use crate::suggest;
21
22/// Conservative limit for concurrent file reads to avoid exhausting file
23/// descriptors. 128 is well below the default soft limit on macOS (256) and
24/// Linux (1024) while still providing good throughput.
25const FD_CONCURRENCY_LIMIT: usize = 128;
26
27/// Composite retriever that dispatches `file://` URIs to local disk reads
28/// and everything else to the HTTP-backed [`SchemaCache`].
29struct LocalRetriever {
30    http: SchemaCache,
31}
32
33#[async_trait::async_trait]
34impl jsonschema::AsyncRetrieve for LocalRetriever {
35    async fn retrieve(
36        &self,
37        uri: &jsonschema::Uri<String>,
38    ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
39        let s = uri.as_str();
40        if let Some(raw) = s.strip_prefix("file://") {
41            let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
42            let content = tokio::fs::read_to_string(path.as_ref()).await?;
43            Ok(serde_json::from_str(&content)?)
44        } else {
45            self.http.retrieve(uri).await
46        }
47    }
48}
49
50pub struct ValidateArgs {
51    /// Glob patterns to find files (empty = auto-discover)
52    pub globs: Vec<String>,
53
54    /// Exclude files matching these globs (repeatable)
55    pub exclude: Vec<String>,
56
57    /// Cache directory for remote schemas
58    pub cache_dir: Option<String>,
59
60    /// Bypass schema cache reads (still writes fetched schemas to cache)
61    pub force_schema_fetch: bool,
62
63    /// Bypass validation cache reads (still writes results to cache)
64    pub force_validation: bool,
65
66    /// Disable `SchemaStore` catalog matching
67    pub no_catalog: bool,
68
69    /// Directory to search for `lintel.toml` (defaults to cwd)
70    pub config_dir: Option<PathBuf>,
71
72    /// TTL for cached schemas. `None` means no expiry.
73    pub schema_cache_ttl: Option<core::time::Duration>,
74}
75
76// ---------------------------------------------------------------------------
77// Internal types
78// ---------------------------------------------------------------------------
79
80/// A file that has been parsed and matched to a schema URI.
81struct ParsedFile {
82    path: String,
83    content: String,
84    instance: Value,
85    /// Original schema URI before rewrites (for override matching).
86    original_schema_uri: String,
87}
88
89// ---------------------------------------------------------------------------
90// Config loading
91// ---------------------------------------------------------------------------
92
93/// Locate `lintel.toml`, load the full config, and return the config directory.
94/// Returns `(config, config_dir, config_path)`.  When no config is found or
95/// cwd is unavailable the config is default and `config_path` is `None`.
96#[tracing::instrument(skip_all)]
97pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
98    let start_dir = match search_dir {
99        Some(d) => d.to_path_buf(),
100        None => match std::env::current_dir() {
101            Ok(d) => d,
102            Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
103        },
104    };
105
106    let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
107        return (lintel_config::Config::default(), start_dir, None);
108    };
109
110    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
111    let cfg = lintel_config::find_and_load(&start_dir)
112        .ok()
113        .flatten()
114        .unwrap_or_default();
115    (cfg, dir, Some(config_path))
116}
117
118// ---------------------------------------------------------------------------
119// File collection
120// ---------------------------------------------------------------------------
121
122/// Collect input files from globs/directories, applying exclude filters.
123///
124/// # Errors
125///
126/// Returns an error if a glob pattern is invalid or a directory cannot be walked.
127#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
128pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
129    if globs.is_empty() {
130        return discover::discover_files(".", exclude);
131    }
132
133    let mut result = Vec::new();
134    for pattern in globs {
135        let path = Path::new(pattern);
136        if path.is_dir() {
137            result.extend(discover::discover_files(pattern, exclude)?);
138        } else {
139            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
140                let path = entry?;
141                if path.is_file() && !is_excluded(&path, exclude) {
142                    result.push(path);
143                }
144            }
145        }
146    }
147    Ok(result)
148}
149
150fn is_excluded(path: &Path, excludes: &[String]) -> bool {
151    let path_str = match path.to_str() {
152        Some(s) => s.strip_prefix("./").unwrap_or(s),
153        None => return false,
154    };
155    excludes
156        .iter()
157        .any(|pattern| glob_match::glob_match(pattern, path_str))
158}
159
160// ---------------------------------------------------------------------------
161// Phase 1: Parse files and resolve schema URIs
162// ---------------------------------------------------------------------------
163
164/// Try parsing content with each known format, returning the first success.
165///
166/// JSONC is tried first (superset of JSON, handles comments), then YAML and
167/// TOML which cover the most common config formats, followed by the rest.
168pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
169    use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
170    const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
171
172    for fmt in FORMATS {
173        let parser = parsers::parser_for(fmt);
174        if let Ok(val) = parser.parse(content, file_name) {
175            return Some((fmt, val));
176        }
177    }
178    None
179}
180
181/// Result of processing a single file: either a parsed file with its schema URI,
182/// a lint error, or nothing (file was skipped).
183enum FileResult {
184    Parsed {
185        schema_uri: String,
186        parsed: ParsedFile,
187    },
188    Error(LintelDiagnostic),
189    Skip,
190}
191
192/// Resolve a relative local schema path against a base directory.
193///
194/// Remote URIs (http/https) are returned unchanged. For local paths, joins with
195/// the provided base directory (file's parent for inline `$schema`, config dir
196/// for config/catalog sources).
197fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
198    if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
199        return schema_uri.to_string();
200    }
201    if let Some(dir) = base_dir {
202        normalize_path(&dir.join(schema_uri))
203            .to_string_lossy()
204            .to_string()
205    } else {
206        schema_uri.to_string()
207    }
208}
209
210/// Normalize a path by resolving `.` and `..` components without touching the
211/// filesystem (unlike `std::fs::canonicalize`).
212fn normalize_path(path: &Path) -> PathBuf {
213    let mut out = PathBuf::new();
214    for component in path.components() {
215        match component {
216            std::path::Component::CurDir => {}
217            std::path::Component::ParentDir => {
218                out.pop();
219            }
220            c => out.push(c),
221        }
222    }
223    out
224}
225
226/// Process a single file's already-read content: parse and resolve schema URI.
227///
228/// Returns a `Vec` because JSONL files expand to one result per non-empty line.
229#[allow(clippy::too_many_arguments)]
230fn process_one_file(
231    path: &Path,
232    content: String,
233    config: &lintel_config::Config,
234    config_dir: &Path,
235    compiled_catalogs: &[CompiledCatalog],
236) -> Vec<FileResult> {
237    let path_str = path.display().to_string();
238    let file_name = path
239        .file_name()
240        .and_then(|n| n.to_str())
241        .unwrap_or(&path_str);
242
243    let detected_format = parsers::detect_format(path);
244
245    // JSONL files get special per-line handling.
246    if detected_format == Some(FileFormat::Jsonl) {
247        return process_jsonl_file(
248            path,
249            &path_str,
250            file_name,
251            &content,
252            config,
253            config_dir,
254            compiled_catalogs,
255        );
256    }
257
258    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
259    if detected_format.is_none() {
260        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
261            || compiled_catalogs
262                .iter()
263                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
264        if !has_match {
265            return vec![FileResult::Skip];
266        }
267    }
268
269    // Parse the file content.
270    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
271        let parser = parsers::parser_for(fmt);
272        match parser.parse(&content, &path_str) {
273            Ok(val) => (parser, val),
274            Err(parse_err) => return vec![FileResult::Error(parse_err)],
275        }
276    } else {
277        match try_parse_all(&content, &path_str) {
278            Some((fmt, val)) => (parsers::parser_for(fmt), val),
279            None => return vec![FileResult::Skip],
280        }
281    };
282
283    // Skip markdown files with no frontmatter
284    if instance.is_null() {
285        return vec![FileResult::Skip];
286    }
287
288    // Schema resolution priority:
289    // 1. Inline $schema / YAML modeline (always wins)
290    // 2. Custom schema mappings from lintel.toml [schemas]
291    // 3. Catalog matching (custom registries > Lintel catalog > SchemaStore)
292    //
293    // Track whether the URI came from inline $schema (resolve relative to file)
294    // or from config/catalog (resolve relative to config dir).
295    let inline_uri = parser.extract_schema_uri(&content, &instance);
296    let from_inline = inline_uri.is_some();
297    let schema_uri = inline_uri
298        .or_else(|| {
299            config
300                .find_schema_mapping(&path_str, file_name)
301                .map(str::to_string)
302        })
303        .or_else(|| {
304            compiled_catalogs
305                .iter()
306                .find_map(|cat| cat.find_schema(&path_str, file_name))
307                .map(str::to_string)
308        });
309
310    let Some(schema_uri) = schema_uri else {
311        return vec![FileResult::Skip];
312    };
313
314    // Keep original URI for override matching (before rewrites)
315    let original_schema_uri = schema_uri.clone();
316
317    // Apply rewrite rules, then resolve // paths relative to lintel.toml
318    let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
319    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
320
321    // Resolve relative local paths:
322    // - Inline $schema: relative to the file's parent directory
323    // - Config/catalog: relative to the config directory (where lintel.toml lives)
324    let schema_uri = resolve_local_schema_path(
325        &schema_uri,
326        if from_inline {
327            path.parent()
328        } else {
329            Some(config_dir)
330        },
331    );
332
333    vec![FileResult::Parsed {
334        schema_uri,
335        parsed: ParsedFile {
336            path: path_str,
337            content,
338            instance,
339            original_schema_uri,
340        },
341    }]
342}
343
344/// Process a JSONL file: parse each line independently and resolve schemas.
345///
346/// Each non-empty line becomes its own [`FileResult::Parsed`]. Schema resolution
347/// priority per line: inline `$schema` on the line > config mapping > catalog.
348///
349/// Also checks schema consistency across lines — mismatches are emitted as
350/// [`FileResult::Error`] so they flow through the normal Reporter pipeline.
351#[allow(clippy::too_many_arguments)]
352fn process_jsonl_file(
353    path: &Path,
354    path_str: &str,
355    file_name: &str,
356    content: &str,
357    config: &lintel_config::Config,
358    config_dir: &Path,
359    compiled_catalogs: &[CompiledCatalog],
360) -> Vec<FileResult> {
361    let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
362        Ok(lines) => lines,
363        Err(parse_err) => return vec![FileResult::Error(parse_err)],
364    };
365
366    if lines.is_empty() {
367        return vec![FileResult::Skip];
368    }
369
370    let mut results = Vec::with_capacity(lines.len());
371
372    // Check schema consistency before consuming lines.
373    if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
374        for m in mismatches {
375            results.push(FileResult::Error(LintelDiagnostic::SchemaMismatch {
376                path: path_str.to_string(),
377                line_number: m.line_number,
378                message: format!("expected consistent $schema but found {}", m.schema_uri),
379            }));
380        }
381    }
382
383    for line in lines {
384        // Schema resolution: inline $schema on line > config > catalog
385        // Track source to resolve relative paths correctly.
386        let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
387        let from_inline = inline_uri.is_some();
388        let schema_uri = inline_uri
389            .or_else(|| {
390                config
391                    .find_schema_mapping(path_str, file_name)
392                    .map(str::to_string)
393            })
394            .or_else(|| {
395                compiled_catalogs
396                    .iter()
397                    .find_map(|cat| cat.find_schema(path_str, file_name))
398                    .map(str::to_string)
399            });
400
401        let Some(schema_uri) = schema_uri else {
402            continue;
403        };
404
405        let original_schema_uri = schema_uri.clone();
406
407        let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
408        let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
409
410        // Inline $schema: relative to file's parent. Config/catalog: relative to config dir.
411        let schema_uri = resolve_local_schema_path(
412            &schema_uri,
413            if from_inline {
414                path.parent()
415            } else {
416                Some(config_dir)
417            },
418        );
419
420        let line_path = format!("{path_str}:{}", line.line_number);
421
422        results.push(FileResult::Parsed {
423            schema_uri,
424            parsed: ParsedFile {
425                path: line_path,
426                content: line.raw,
427                instance: line.value,
428                original_schema_uri,
429            },
430        });
431    }
432
433    if results.is_empty() {
434        vec![FileResult::Skip]
435    } else {
436        results
437    }
438}
439
440/// Read files concurrently with tokio, using a semaphore to avoid exhausting
441/// file descriptors. I/O errors are pushed as `LintelDiagnostic::Io`.
442///
443/// # Panics
444///
445/// Panics if the internal semaphore is unexpectedly closed (should not happen).
446#[tracing::instrument(skip_all, fields(file_count = files.len()))]
447pub async fn read_files(
448    files: &[PathBuf],
449    errors: &mut Vec<LintelDiagnostic>,
450) -> Vec<(PathBuf, String)> {
451    let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
452    let mut read_set = tokio::task::JoinSet::new();
453    for path in files {
454        let path = path.clone();
455        let sem = semaphore.clone();
456        read_set.spawn(async move {
457            let _permit = sem.acquire().await.expect("semaphore closed");
458            let result = tokio::fs::read_to_string(&path).await;
459            (path, result)
460        });
461    }
462
463    let mut file_contents = Vec::with_capacity(files.len());
464    while let Some(result) = read_set.join_next().await {
465        match result {
466            Ok((path, Ok(content))) => file_contents.push((path, content)),
467            Ok((path, Err(e))) => {
468                errors.push(LintelDiagnostic::Io {
469                    path: path.display().to_string(),
470                    message: format!("failed to read: {e}"),
471                });
472            }
473            Err(e) => tracing::warn!("file read task panicked: {e}"),
474        }
475    }
476
477    file_contents
478}
479
480/// Parse pre-read file contents, extract schema URIs, apply rewrites, and
481/// group by resolved schema URI.
482#[tracing::instrument(skip_all, fields(file_count = file_contents.len()))]
483#[allow(clippy::too_many_arguments)]
484fn parse_and_group_contents(
485    file_contents: Vec<(PathBuf, String)>,
486    config: &lintel_config::Config,
487    config_dir: &Path,
488    compiled_catalogs: &[CompiledCatalog],
489    errors: &mut Vec<LintelDiagnostic>,
490) -> BTreeMap<String, Vec<ParsedFile>> {
491    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
492    for (path, content) in file_contents {
493        let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
494        for result in results {
495            match result {
496                FileResult::Parsed { schema_uri, parsed } => {
497                    schema_groups.entry(schema_uri).or_default().push(parsed);
498                }
499                FileResult::Error(e) => errors.push(e),
500                FileResult::Skip => {}
501            }
502        }
503    }
504
505    schema_groups
506}
507
508// ---------------------------------------------------------------------------
509// Phase 2: Schema fetching, compilation, and instance validation
510// ---------------------------------------------------------------------------
511
512/// Fetch a schema by URI, returning its parsed JSON and cache status.
513///
514/// For remote URIs, checks the prefetched map first; for local URIs, reads
515/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
516#[allow(clippy::too_many_arguments)]
517async fn fetch_schema_from_prefetched(
518    schema_uri: &str,
519    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
520    local_cache: &mut HashMap<String, Value>,
521    group: &[ParsedFile],
522    errors: &mut Vec<LintelDiagnostic>,
523    checked: &mut Vec<CheckedFile>,
524    on_check: &mut impl FnMut(&CheckedFile),
525) -> Option<(Value, Option<CacheStatus>)> {
526    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
527
528    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
529        match prefetched.get(schema_uri) {
530            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
531            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
532            None => Err(format!("schema not prefetched: {schema_uri}")),
533        }
534    } else if let Some(cached) = local_cache.get(schema_uri) {
535        Ok((cached.clone(), None))
536    } else {
537        tokio::fs::read_to_string(schema_uri)
538            .await
539            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
540            .and_then(|content| {
541                serde_json::from_str::<Value>(&content)
542                    .map(|v| {
543                        local_cache.insert(schema_uri.to_string(), v.clone());
544                        (v, None)
545                    })
546                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
547            })
548    };
549
550    match result {
551        Ok(value) => Some(value),
552        Err(message) => {
553            report_group_error(
554                |path| LintelDiagnostic::SchemaFetch {
555                    path: path.to_string(),
556                    message: message.clone(),
557                },
558                schema_uri,
559                None,
560                group,
561                errors,
562                checked,
563                on_check,
564            );
565            None
566        }
567    }
568}
569
570/// Report the same error for every file in a schema group.
571#[allow(clippy::too_many_arguments)]
572fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
573    make_error: impl Fn(&str) -> LintelDiagnostic,
574    schema_uri: &str,
575    cache_status: Option<CacheStatus>,
576    group: &[P],
577    errors: &mut Vec<LintelDiagnostic>,
578    checked: &mut Vec<CheckedFile>,
579    on_check: &mut impl FnMut(&CheckedFile),
580) {
581    for item in group {
582        let pf = item.borrow();
583        let cf = CheckedFile {
584            path: pf.path.clone(),
585            schema: schema_uri.to_string(),
586            cache_status,
587            validation_cache_status: None,
588        };
589        on_check(&cf);
590        checked.push(cf);
591        errors.push(make_error(&pf.path));
592    }
593}
594
595/// Mark every file in a group as checked (no errors).
596#[allow(clippy::too_many_arguments)]
597fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
598    schema_uri: &str,
599    cache_status: Option<CacheStatus>,
600    validation_cache_status: Option<ValidationCacheStatus>,
601    group: &[P],
602    checked: &mut Vec<CheckedFile>,
603    on_check: &mut impl FnMut(&CheckedFile),
604) {
605    for item in group {
606        let pf = item.borrow();
607        let cf = CheckedFile {
608            path: pf.path.clone(),
609            schema: schema_uri.to_string(),
610            cache_status,
611            validation_cache_status,
612        };
613        on_check(&cf);
614        checked.push(cf);
615    }
616}
617
618/// Convert [`ValidationError`]s into [`LintelDiagnostic::Validation`] diagnostics.
619#[allow(clippy::too_many_arguments)]
620fn push_validation_errors(
621    pf: &ParsedFile,
622    schema_url: &str,
623    validation_errors: &[ValidationError],
624    errors: &mut Vec<LintelDiagnostic>,
625    schema: Option<&Value>,
626) {
627    for ve in validation_errors {
628        let instance_path = if ve.instance_path.is_empty() {
629            DEFAULT_LABEL.to_string()
630        } else {
631            ve.instance_path.clone()
632        };
633        let label = format_label(&instance_path, &ve.schema_path);
634        let source_span: miette::SourceSpan = ve.span.into();
635        let mut message = ve.kind.message();
636        if let ValidationErrorKind::AdditionalProperty { ref property } = ve.kind
637            && let Some(s) = schema
638            && let Some(suggestion) = suggest::suggest_property(property, &ve.schema_path, s)
639        {
640            message = format!("{message}; did you mean '{suggestion}'?");
641        }
642        errors.push(LintelDiagnostic::Validation {
643            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
644            span: source_span,
645            schema_span: source_span,
646            path: pf.path.clone(),
647            instance_path,
648            label,
649            message,
650            schema_url: schema_url.to_string(),
651            schema_path: ve.schema_path.clone(),
652        });
653    }
654}
655
656/// Map a `jsonschema::error::ValidationErrorKind` to our serializable
657/// [`ValidationErrorKind`]. `AdditionalProperties` is handled separately
658/// in [`convert_error`].
659fn convert_kind(kind: &jsonschema::error::ValidationErrorKind) -> ValidationErrorKind {
660    use jsonschema::error::{TypeKind, ValidationErrorKind as JK};
661
662    match kind {
663        JK::AdditionalItems { limit } => ValidationErrorKind::AdditionalItems { limit: *limit },
664        JK::AdditionalProperties { .. } => unreachable!("handled in convert_error"),
665        JK::AnyOf { .. } => ValidationErrorKind::AnyOf,
666        JK::BacktrackLimitExceeded { error } => ValidationErrorKind::BacktrackLimitExceeded {
667            message: error.to_string(),
668        },
669        JK::Constant { expected_value } => ValidationErrorKind::Constant {
670            expected_value: expected_value.clone(),
671        },
672        JK::Contains => ValidationErrorKind::Contains,
673        JK::ContentEncoding { content_encoding } => ValidationErrorKind::ContentEncoding {
674            content_encoding: content_encoding.clone(),
675        },
676        JK::ContentMediaType { content_media_type } => ValidationErrorKind::ContentMediaType {
677            content_media_type: content_media_type.clone(),
678        },
679        JK::Custom { keyword, message } => ValidationErrorKind::Custom {
680            keyword: keyword.clone(),
681            message: message.clone(),
682        },
683        JK::Enum { options } => ValidationErrorKind::Enum {
684            options: options.clone(),
685        },
686        JK::ExclusiveMaximum { limit } => ValidationErrorKind::ExclusiveMaximum {
687            limit: limit.clone(),
688        },
689        JK::ExclusiveMinimum { limit } => ValidationErrorKind::ExclusiveMinimum {
690            limit: limit.clone(),
691        },
692        JK::FalseSchema => ValidationErrorKind::FalseSchema,
693        JK::Format { format } => ValidationErrorKind::Format {
694            format: format.clone(),
695        },
696        JK::FromUtf8 { error } => ValidationErrorKind::FromUtf8 {
697            message: error.to_string(),
698        },
699        JK::MaxItems { limit } => ValidationErrorKind::MaxItems { limit: *limit },
700        JK::Maximum { limit } => ValidationErrorKind::Maximum {
701            limit: limit.clone(),
702        },
703        JK::MaxLength { limit } => ValidationErrorKind::MaxLength { limit: *limit },
704        JK::MaxProperties { limit } => ValidationErrorKind::MaxProperties { limit: *limit },
705        JK::MinItems { limit } => ValidationErrorKind::MinItems { limit: *limit },
706        JK::Minimum { limit } => ValidationErrorKind::Minimum {
707            limit: limit.clone(),
708        },
709        JK::MinLength { limit } => ValidationErrorKind::MinLength { limit: *limit },
710        JK::MinProperties { limit } => ValidationErrorKind::MinProperties { limit: *limit },
711        JK::MultipleOf { multiple_of } => ValidationErrorKind::MultipleOf {
712            multiple_of: *multiple_of,
713        },
714        JK::Not { .. } => ValidationErrorKind::Not,
715        JK::OneOfMultipleValid { .. } => ValidationErrorKind::OneOfMultipleValid,
716        JK::OneOfNotValid { .. } => ValidationErrorKind::OneOfNotValid,
717        JK::Pattern { pattern } => ValidationErrorKind::Pattern {
718            pattern: pattern.clone(),
719        },
720        JK::PropertyNames { error } => ValidationErrorKind::PropertyNames {
721            message: error.to_string(),
722        },
723        JK::Required { property } => ValidationErrorKind::Required {
724            property: match property {
725                Value::String(s) => format!("\"{s}\""),
726                other => other.to_string(),
727            },
728        },
729        JK::Type { kind } => {
730            let expected = match kind {
731                TypeKind::Single(t) => t.to_string(),
732                TypeKind::Multiple(ts) => {
733                    let parts: Vec<String> = ts.iter().map(|t| t.to_string()).collect();
734                    parts.join(", ")
735                }
736            };
737            ValidationErrorKind::Type { expected }
738        }
739        JK::UnevaluatedItems { unexpected } => ValidationErrorKind::UnevaluatedItems {
740            unexpected: unexpected.clone(),
741        },
742        JK::UnevaluatedProperties { unexpected } => ValidationErrorKind::UnevaluatedProperties {
743            unexpected: unexpected.clone(),
744        },
745        JK::UniqueItems => ValidationErrorKind::UniqueItems,
746        JK::Referencing(err) => ValidationErrorKind::Referencing {
747            message: err.to_string(),
748        },
749    }
750}
751
752/// Convert a single `jsonschema::ValidationError` into one or more typed
753/// [`ValidationError`]s with pre-computed spans.
754///
755/// `AdditionalProperties` errors are split into one per unexpected property.
756fn convert_error(error: &jsonschema::ValidationError<'_>, content: &str) -> Vec<ValidationError> {
757    use jsonschema::error::ValidationErrorKind as JK;
758
759    let schema_path = error.schema_path().to_string();
760    let base_instance_path = error.instance_path().to_string();
761
762    if let JK::AdditionalProperties { unexpected } = error.kind() {
763        return unexpected
764            .iter()
765            .map(|prop| {
766                let instance_path = format!("{base_instance_path}/{prop}");
767                let span = find_instance_path_span(content, &instance_path);
768                ValidationError {
769                    instance_path,
770                    schema_path: schema_path.clone(),
771                    kind: ValidationErrorKind::AdditionalProperty {
772                        property: prop.clone(),
773                    },
774                    span,
775                }
776            })
777            .collect();
778    }
779
780    let span = find_instance_path_span(content, &base_instance_path);
781    vec![ValidationError {
782        instance_path: base_instance_path,
783        schema_path,
784        kind: convert_kind(error.kind()),
785        span,
786    }]
787}
788
789/// Validate all files in a group against an already-compiled validator and store
790/// results in the validation cache.
791#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
792#[allow(clippy::too_many_arguments)]
793async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
794    validator: &jsonschema::Validator,
795    schema_uri: &str,
796    schema_hash: &str,
797    validate_formats: bool,
798    cache_status: Option<CacheStatus>,
799    group: &[P],
800    schema_value: &Value,
801    vcache: &lintel_validation_cache::ValidationCache,
802    errors: &mut Vec<LintelDiagnostic>,
803    checked: &mut Vec<CheckedFile>,
804    on_check: &mut impl FnMut(&CheckedFile),
805) {
806    for item in group {
807        let pf = item.borrow();
808        let file_errors: Vec<ValidationError> = validator
809            .iter_errors(&pf.instance)
810            .flat_map(|error| convert_error(&error, &pf.content))
811            .collect();
812
813        vcache
814            .store(
815                &lintel_validation_cache::CacheKey {
816                    file_content: &pf.content,
817                    schema_hash,
818                    validate_formats,
819                },
820                &file_errors,
821            )
822            .await;
823        push_validation_errors(pf, schema_uri, &file_errors, errors, Some(schema_value));
824
825        let cf = CheckedFile {
826            path: pf.path.clone(),
827            schema: schema_uri.to_string(),
828            cache_status,
829            validation_cache_status: Some(ValidationCacheStatus::Miss),
830        };
831        on_check(&cf);
832        checked.push(cf);
833    }
834}
835
836// ---------------------------------------------------------------------------
837// Public API
838// ---------------------------------------------------------------------------
839
840/// Fetch and compile all schema catalogs (default, `SchemaStore`, and custom registries).
841///
842/// Returns a list of compiled catalogs, printing warnings for any that fail to fetch.
843pub async fn fetch_compiled_catalogs(
844    retriever: &SchemaCache,
845    config: &lintel_config::Config,
846    no_catalog: bool,
847) -> Vec<CompiledCatalog> {
848    let mut compiled_catalogs = Vec::new();
849
850    if !no_catalog {
851        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
852
853        // Catalogs are fetched concurrently but sorted by priority so that
854        // the Lintel catalog wins over custom registries, which win over
855        // SchemaStore.  The `order` field encodes this precedence.
856        #[allow(clippy::items_after_statements)]
857        type CatalogResult = (
858            usize, // priority (lower = higher precedence)
859            String,
860            Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
861        );
862        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
863
864        // Custom registries from lintel.toml (highest precedence among catalogs)
865        for (i, registry_url) in config.registries.iter().enumerate() {
866            let r = retriever.clone();
867            let url = registry_url.clone();
868            let label = format!("registry {url}");
869            catalog_tasks.spawn(async move {
870                let result = registry::fetch(&r, &url)
871                    .await
872                    .map(|cat| CompiledCatalog::compile(&cat));
873                (i, label, result)
874            });
875        }
876
877        // Lintel catalog
878        let lintel_order = config.registries.len();
879        if !config.no_default_catalog {
880            let r = retriever.clone();
881            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
882            catalog_tasks.spawn(async move {
883                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
884                    .await
885                    .map(|cat| CompiledCatalog::compile(&cat));
886                (lintel_order, label, result)
887            });
888        }
889
890        // SchemaStore catalog (lowest precedence)
891        let schemastore_order = config.registries.len() + 1;
892        let r = retriever.clone();
893        catalog_tasks.spawn(async move {
894            let result = catalog::fetch_catalog(&r)
895                .await
896                .map(|cat| CompiledCatalog::compile(&cat));
897            (schemastore_order, "SchemaStore catalog".to_string(), result)
898        });
899
900        let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
901        while let Some(result) = catalog_tasks.join_next().await {
902            match result {
903                Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
904                Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
905                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
906            }
907        }
908        results.sort_by_key(|(order, _)| *order);
909        compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
910
911        drop(catalog_span);
912    }
913
914    compiled_catalogs
915}
916
917/// # Errors
918///
919/// Returns an error if file collection or schema validation encounters an I/O error.
920pub async fn run(args: &ValidateArgs) -> Result<CheckResult> {
921    run_with(args, None, |_| {}).await
922}
923
924/// Like [`run`], but calls `on_check` each time a file is checked, allowing
925/// callers to stream progress (e.g. verbose output) as files are processed.
926///
927/// # Errors
928///
929/// Returns an error if file collection or schema validation encounters an I/O error.
930#[tracing::instrument(skip_all, name = "validate")]
931pub async fn run_with(
932    args: &ValidateArgs,
933    cache: Option<SchemaCache>,
934    mut on_check: impl FnMut(&CheckedFile),
935) -> Result<CheckResult> {
936    let retriever = build_retriever(args, cache);
937    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
938    let files = collect_files(&args.globs, &args.exclude)?;
939    tracing::info!(file_count = files.len(), "collected files");
940
941    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
942
943    let mut errors: Vec<LintelDiagnostic> = Vec::new();
944    let file_contents = read_files(&files, &mut errors).await;
945
946    run_with_contents_inner(
947        file_contents,
948        args,
949        retriever,
950        config,
951        &config_dir,
952        compiled_catalogs,
953        errors,
954        &mut on_check,
955    )
956    .await
957}
958
959/// Like [`run_with`], but accepts pre-read file contents instead of reading
960/// from disk. Use this when the caller has already read files (e.g. to share
961/// reads between format checking and validation).
962///
963/// # Errors
964///
965/// Returns an error if schema validation encounters an I/O or network error.
966pub async fn run_with_contents(
967    args: &ValidateArgs,
968    file_contents: Vec<(PathBuf, String)>,
969    cache: Option<SchemaCache>,
970    mut on_check: impl FnMut(&CheckedFile),
971) -> Result<CheckResult> {
972    let retriever = build_retriever(args, cache);
973    let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
974    let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
975    let errors: Vec<LintelDiagnostic> = Vec::new();
976
977    run_with_contents_inner(
978        file_contents,
979        args,
980        retriever,
981        config,
982        &config_dir,
983        compiled_catalogs,
984        errors,
985        &mut on_check,
986    )
987    .await
988}
989
990fn build_retriever(args: &ValidateArgs, cache: Option<SchemaCache>) -> SchemaCache {
991    if let Some(c) = cache {
992        return c;
993    }
994    let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
995    if let Some(dir) = &args.cache_dir {
996        let path = PathBuf::from(dir);
997        let _ = fs::create_dir_all(&path);
998        builder = builder.cache_dir(path);
999    }
1000    if let Some(ttl) = args.schema_cache_ttl {
1001        builder = builder.ttl(ttl);
1002    }
1003    builder.build()
1004}
1005
1006#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
1007async fn run_with_contents_inner(
1008    file_contents: Vec<(PathBuf, String)>,
1009    args: &ValidateArgs,
1010    retriever: SchemaCache,
1011    config: lintel_config::Config,
1012    config_dir: &Path,
1013    compiled_catalogs: Vec<CompiledCatalog>,
1014    mut errors: Vec<LintelDiagnostic>,
1015    on_check: &mut impl FnMut(&CheckedFile),
1016) -> Result<CheckResult> {
1017    let mut checked: Vec<CheckedFile> = Vec::new();
1018
1019    // Phase 1: Parse files and resolve schema URIs
1020    let schema_groups = parse_and_group_contents(
1021        file_contents,
1022        &config,
1023        config_dir,
1024        &compiled_catalogs,
1025        &mut errors,
1026    );
1027    tracing::info!(
1028        schema_count = schema_groups.len(),
1029        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
1030        "grouped files by schema"
1031    );
1032
1033    // Create validation cache
1034    let vcache = lintel_validation_cache::ValidationCache::new(
1035        lintel_validation_cache::ensure_cache_dir(),
1036        args.force_validation,
1037    );
1038
1039    // Prefetch all remote schemas in parallel
1040    let remote_uris: Vec<&String> = schema_groups
1041        .keys()
1042        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
1043        .collect();
1044
1045    let prefetched = {
1046        let _prefetch_span =
1047            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
1048
1049        let mut schema_tasks = tokio::task::JoinSet::new();
1050        for uri in remote_uris {
1051            let r = retriever.clone();
1052            let u = uri.clone();
1053            schema_tasks.spawn(async move {
1054                let result = r.fetch(&u).await;
1055                (u, result)
1056            });
1057        }
1058
1059        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
1060        while let Some(result) = schema_tasks.join_next().await {
1061            match result {
1062                Ok((uri, fetch_result)) => {
1063                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
1064                }
1065                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
1066            }
1067        }
1068
1069        prefetched
1070    };
1071
1072    // Phase 2: Compile each schema once and validate all matching files
1073    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
1074    let mut fetch_time = core::time::Duration::ZERO;
1075    let mut hash_time = core::time::Duration::ZERO;
1076    let mut vcache_time = core::time::Duration::ZERO;
1077    let mut compile_time = core::time::Duration::ZERO;
1078    let mut validate_time = core::time::Duration::ZERO;
1079
1080    for (schema_uri, group) in &schema_groups {
1081        let _group_span = tracing::debug_span!(
1082            "schema_group",
1083            schema = schema_uri.as_str(),
1084            files = group.len(),
1085        )
1086        .entered();
1087
1088        // If ANY file in the group matches a `validate_formats = false` override,
1089        // disable format validation for the whole group (they share one compiled validator).
1090        let validate_formats = group.iter().all(|pf| {
1091            config
1092                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
1093        });
1094
1095        // Remote schemas were prefetched in parallel above; local schemas are
1096        // read from disk here (with in-memory caching).
1097        let t = std::time::Instant::now();
1098        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
1099            schema_uri,
1100            &prefetched,
1101            &mut local_schema_cache,
1102            group,
1103            &mut errors,
1104            &mut checked,
1105            on_check,
1106        )
1107        .await
1108        else {
1109            fetch_time += t.elapsed();
1110            continue;
1111        };
1112        fetch_time += t.elapsed();
1113
1114        // Pre-compute schema hash once for the entire group.
1115        let t = std::time::Instant::now();
1116        let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
1117        hash_time += t.elapsed();
1118
1119        // Split the group into validation cache hits and misses.
1120        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
1121
1122        let t = std::time::Instant::now();
1123        for pf in group {
1124            let (cached, vcache_status) = vcache
1125                .lookup(&lintel_validation_cache::CacheKey {
1126                    file_content: &pf.content,
1127                    schema_hash: &schema_hash,
1128                    validate_formats,
1129                })
1130                .await;
1131
1132            if let Some(cached_errors) = cached {
1133                push_validation_errors(
1134                    pf,
1135                    schema_uri,
1136                    &cached_errors,
1137                    &mut errors,
1138                    Some(&schema_value),
1139                );
1140                let cf = CheckedFile {
1141                    path: pf.path.clone(),
1142                    schema: schema_uri.clone(),
1143                    cache_status,
1144                    validation_cache_status: Some(vcache_status),
1145                };
1146                on_check(&cf);
1147                checked.push(cf);
1148            } else {
1149                cache_misses.push(pf);
1150            }
1151        }
1152        vcache_time += t.elapsed();
1153
1154        tracing::debug!(
1155            cache_hits = group.len() - cache_misses.len(),
1156            cache_misses = cache_misses.len(),
1157            "validation cache"
1158        );
1159
1160        // If all files hit the validation cache, skip schema compilation entirely.
1161        if cache_misses.is_empty() {
1162            continue;
1163        }
1164
1165        // Compile the schema for cache misses.
1166        let t = std::time::Instant::now();
1167        let validator = {
1168            // Set base URI so relative $ref values (e.g. "./rule.json") resolve
1169            // correctly. Remote schemas use the HTTP URI directly; local schemas
1170            // get a file:// URI derived from the canonical absolute path.
1171            let is_remote_schema =
1172                schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
1173            let local_retriever = LocalRetriever {
1174                http: retriever.clone(),
1175            };
1176            let opts = jsonschema::async_options()
1177                .with_retriever(local_retriever)
1178                .should_validate_formats(validate_formats);
1179            let base_uri = if is_remote_schema {
1180                // Strip fragment (e.g. "#") — base URIs must not contain fragments.
1181                let uri = match schema_uri.find('#') {
1182                    Some(pos) => schema_uri[..pos].to_string(),
1183                    None => schema_uri.clone(),
1184                };
1185                Some(uri)
1186            } else {
1187                std::fs::canonicalize(schema_uri)
1188                    .ok()
1189                    .map(|p| format!("file://{}", p.display()))
1190            };
1191            let opts = if let Some(uri) = base_uri {
1192                opts.with_base_uri(uri)
1193            } else {
1194                opts
1195            };
1196            match opts.build(&schema_value).await {
1197                Ok(v) => v,
1198                Err(e) => {
1199                    compile_time += t.elapsed();
1200                    // When format validation is disabled and the compilation error
1201                    // is a uri-reference issue (e.g. Rust-style $ref paths in
1202                    // vector.json), skip validation silently.
1203                    if !validate_formats && e.to_string().contains("uri-reference") {
1204                        mark_group_checked(
1205                            schema_uri,
1206                            cache_status,
1207                            Some(ValidationCacheStatus::Miss),
1208                            &cache_misses,
1209                            &mut checked,
1210                            on_check,
1211                        );
1212                        continue;
1213                    }
1214                    let msg = format!("failed to compile schema: {e}");
1215                    report_group_error(
1216                        |path| LintelDiagnostic::SchemaCompile {
1217                            path: path.to_string(),
1218                            message: msg.clone(),
1219                        },
1220                        schema_uri,
1221                        cache_status,
1222                        &cache_misses,
1223                        &mut errors,
1224                        &mut checked,
1225                        on_check,
1226                    );
1227                    continue;
1228                }
1229            }
1230        };
1231        compile_time += t.elapsed();
1232
1233        let t = std::time::Instant::now();
1234        validate_group(
1235            &validator,
1236            schema_uri,
1237            &schema_hash,
1238            validate_formats,
1239            cache_status,
1240            &cache_misses,
1241            &schema_value,
1242            &vcache,
1243            &mut errors,
1244            &mut checked,
1245            on_check,
1246        )
1247        .await;
1248        validate_time += t.elapsed();
1249    }
1250
1251    #[allow(clippy::cast_possible_truncation)]
1252    {
1253        tracing::info!(
1254            fetch_ms = fetch_time.as_millis() as u64,
1255            hash_ms = hash_time.as_millis() as u64,
1256            vcache_ms = vcache_time.as_millis() as u64,
1257            compile_ms = compile_time.as_millis() as u64,
1258            validate_ms = validate_time.as_millis() as u64,
1259            "phase2 breakdown"
1260        );
1261    }
1262
1263    // Sort errors for deterministic output (by path, then by span offset)
1264    errors.sort_by(|a, b| {
1265        a.path()
1266            .cmp(b.path())
1267            .then_with(|| a.offset().cmp(&b.offset()))
1268    });
1269
1270    Ok(CheckResult { errors, checked })
1271}
1272
1273#[cfg(test)]
1274mod tests {
1275    use super::*;
1276    use lintel_schema_cache::SchemaCache;
1277    use std::path::Path;
1278
1279    fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1280        let cache = SchemaCache::memory();
1281        for (uri, body) in entries {
1282            cache.insert(
1283                uri,
1284                serde_json::from_str(body).expect("test mock: invalid JSON"),
1285            );
1286        }
1287        cache
1288    }
1289
1290    fn testdata() -> PathBuf {
1291        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1292    }
1293
1294    /// Build glob patterns that scan one or more testdata directories for all supported file types.
1295    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1296        dirs.iter()
1297            .flat_map(|dir| {
1298                let base = testdata().join(dir);
1299                vec![
1300                    base.join("*.json").to_string_lossy().to_string(),
1301                    base.join("*.yaml").to_string_lossy().to_string(),
1302                    base.join("*.yml").to_string_lossy().to_string(),
1303                    base.join("*.json5").to_string_lossy().to_string(),
1304                    base.join("*.jsonc").to_string_lossy().to_string(),
1305                    base.join("*.toml").to_string_lossy().to_string(),
1306                ]
1307            })
1308            .collect()
1309    }
1310
1311    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1312        ValidateArgs {
1313            globs: scenario_globs(dirs),
1314            exclude: vec![],
1315            cache_dir: None,
1316            force_schema_fetch: true,
1317            force_validation: true,
1318            no_catalog: true,
1319            config_dir: None,
1320            schema_cache_ttl: None,
1321        }
1322    }
1323
1324    const SCHEMA: &str =
1325        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1326
1327    fn schema_mock() -> SchemaCache {
1328        mock(&[("https://example.com/schema.json", SCHEMA)])
1329    }
1330
1331    // --- Directory scanning tests ---
1332
1333    #[tokio::test]
1334    async fn no_matching_files() -> anyhow::Result<()> {
1335        let tmp = tempfile::tempdir()?;
1336        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1337        let c = ValidateArgs {
1338            globs: vec![pattern],
1339            exclude: vec![],
1340            cache_dir: None,
1341            force_schema_fetch: true,
1342            force_validation: true,
1343            no_catalog: true,
1344            config_dir: None,
1345            schema_cache_ttl: None,
1346        };
1347        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1348        assert!(!result.has_errors());
1349        Ok(())
1350    }
1351
1352    #[tokio::test]
1353    async fn dir_all_valid() -> anyhow::Result<()> {
1354        let c = args_for_dirs(&["positive_tests"]);
1355        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1356        assert!(!result.has_errors());
1357        Ok(())
1358    }
1359
1360    #[tokio::test]
1361    async fn dir_all_invalid() -> anyhow::Result<()> {
1362        let c = args_for_dirs(&["negative_tests"]);
1363        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1364        assert!(result.has_errors());
1365        Ok(())
1366    }
1367
1368    #[tokio::test]
1369    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1370        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1371        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1372        assert!(result.has_errors());
1373        Ok(())
1374    }
1375
1376    #[tokio::test]
1377    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1378        let c = args_for_dirs(&["no_schema"]);
1379        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1380        assert!(!result.has_errors());
1381        Ok(())
1382    }
1383
1384    #[tokio::test]
1385    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1386        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1387        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1388        assert!(!result.has_errors());
1389        Ok(())
1390    }
1391
1392    // --- Directory as positional arg ---
1393
1394    #[tokio::test]
1395    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1396        let dir = testdata().join("positive_tests");
1397        let c = ValidateArgs {
1398            globs: vec![dir.to_string_lossy().to_string()],
1399            exclude: vec![],
1400            cache_dir: None,
1401            force_schema_fetch: true,
1402            force_validation: true,
1403            no_catalog: true,
1404            config_dir: None,
1405            schema_cache_ttl: None,
1406        };
1407        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1408        assert!(!result.has_errors());
1409        assert!(result.files_checked() > 0);
1410        Ok(())
1411    }
1412
1413    #[tokio::test]
1414    async fn multiple_directory_args() -> anyhow::Result<()> {
1415        let pos_dir = testdata().join("positive_tests");
1416        let no_schema_dir = testdata().join("no_schema");
1417        let c = ValidateArgs {
1418            globs: vec![
1419                pos_dir.to_string_lossy().to_string(),
1420                no_schema_dir.to_string_lossy().to_string(),
1421            ],
1422            exclude: vec![],
1423            cache_dir: None,
1424            force_schema_fetch: true,
1425            force_validation: true,
1426            no_catalog: true,
1427            config_dir: None,
1428            schema_cache_ttl: None,
1429        };
1430        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1431        assert!(!result.has_errors());
1432        Ok(())
1433    }
1434
1435    #[tokio::test]
1436    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1437        let dir = testdata().join("positive_tests");
1438        let glob_pattern = testdata()
1439            .join("no_schema")
1440            .join("*.json")
1441            .to_string_lossy()
1442            .to_string();
1443        let c = ValidateArgs {
1444            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1445            exclude: vec![],
1446            cache_dir: None,
1447            force_schema_fetch: true,
1448            force_validation: true,
1449            no_catalog: true,
1450            config_dir: None,
1451            schema_cache_ttl: None,
1452        };
1453        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1454        assert!(!result.has_errors());
1455        Ok(())
1456    }
1457
1458    #[tokio::test]
1459    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1460        let base = testdata().join("malformed");
1461        let c = ValidateArgs {
1462            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1463            exclude: vec![],
1464            cache_dir: None,
1465            force_schema_fetch: true,
1466            force_validation: true,
1467            no_catalog: true,
1468            config_dir: None,
1469            schema_cache_ttl: None,
1470        };
1471        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1472        assert!(result.has_errors());
1473        Ok(())
1474    }
1475
1476    #[tokio::test]
1477    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1478        let base = testdata().join("malformed");
1479        let c = ValidateArgs {
1480            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1481            exclude: vec![],
1482            cache_dir: None,
1483            force_schema_fetch: true,
1484            force_validation: true,
1485            no_catalog: true,
1486            config_dir: None,
1487            schema_cache_ttl: None,
1488        };
1489        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1490        assert!(result.has_errors());
1491        Ok(())
1492    }
1493
1494    // --- Exclude filter ---
1495
1496    #[tokio::test]
1497    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1498        let base = testdata().join("negative_tests");
1499        let c = ValidateArgs {
1500            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1501            exclude: vec![
1502                base.join("missing_name.json").to_string_lossy().to_string(),
1503                base.join("missing_name.toml").to_string_lossy().to_string(),
1504                base.join("missing_name.yaml").to_string_lossy().to_string(),
1505            ],
1506            cache_dir: None,
1507            force_schema_fetch: true,
1508            force_validation: true,
1509            no_catalog: true,
1510            config_dir: None,
1511            schema_cache_ttl: None,
1512        };
1513        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1514        assert!(!result.has_errors());
1515        Ok(())
1516    }
1517
1518    // --- Cache options ---
1519
1520    #[tokio::test]
1521    async fn custom_cache_dir() -> anyhow::Result<()> {
1522        let c = ValidateArgs {
1523            globs: scenario_globs(&["positive_tests"]),
1524            exclude: vec![],
1525            cache_dir: None,
1526            force_schema_fetch: true,
1527            force_validation: true,
1528            no_catalog: true,
1529            config_dir: None,
1530            schema_cache_ttl: None,
1531        };
1532        let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1533        assert!(!result.has_errors());
1534        Ok(())
1535    }
1536
1537    // --- Local schema ---
1538
1539    #[tokio::test]
1540    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1541        let tmp = tempfile::tempdir()?;
1542        let schema_path = tmp.path().join("schema.json");
1543        fs::write(&schema_path, SCHEMA)?;
1544
1545        let f = tmp.path().join("valid.json");
1546        fs::write(
1547            &f,
1548            format!(
1549                r#"{{"$schema":"{}","name":"hello"}}"#,
1550                schema_path.to_string_lossy()
1551            ),
1552        )?;
1553
1554        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1555        let c = ValidateArgs {
1556            globs: vec![pattern],
1557            exclude: vec![],
1558            cache_dir: None,
1559            force_schema_fetch: true,
1560            force_validation: true,
1561            no_catalog: true,
1562            config_dir: None,
1563            schema_cache_ttl: None,
1564        };
1565        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1566        assert!(!result.has_errors());
1567        Ok(())
1568    }
1569
1570    #[tokio::test]
1571    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1572        let tmp = tempfile::tempdir()?;
1573        let schema_path = tmp.path().join("schema.json");
1574        fs::write(&schema_path, SCHEMA)?;
1575
1576        let f = tmp.path().join("valid.yaml");
1577        fs::write(
1578            &f,
1579            format!(
1580                "# yaml-language-server: $schema={}\nname: hello\n",
1581                schema_path.to_string_lossy()
1582            ),
1583        )?;
1584
1585        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1586        let c = ValidateArgs {
1587            globs: vec![pattern],
1588            exclude: vec![],
1589            cache_dir: None,
1590            force_schema_fetch: true,
1591            force_validation: true,
1592            no_catalog: true,
1593            config_dir: None,
1594            schema_cache_ttl: None,
1595        };
1596        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1597        assert!(!result.has_errors());
1598        Ok(())
1599    }
1600
1601    #[tokio::test]
1602    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1603        let tmp = tempfile::tempdir()?;
1604        let f = tmp.path().join("ref.json");
1605        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1606
1607        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1608        let c = ValidateArgs {
1609            globs: vec![pattern],
1610            exclude: vec![],
1611            cache_dir: None,
1612            force_schema_fetch: true,
1613            force_validation: true,
1614            no_catalog: true,
1615            config_dir: None,
1616            schema_cache_ttl: None,
1617        };
1618        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1619        assert!(result.has_errors());
1620        Ok(())
1621    }
1622
1623    // --- JSON5 / JSONC tests ---
1624
1625    #[tokio::test]
1626    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1627        let tmp = tempfile::tempdir()?;
1628        let schema_path = tmp.path().join("schema.json");
1629        fs::write(&schema_path, SCHEMA)?;
1630
1631        let f = tmp.path().join("config.json5");
1632        fs::write(
1633            &f,
1634            format!(
1635                r#"{{
1636  // JSON5 comment
1637  "$schema": "{}",
1638  name: "hello",
1639}}"#,
1640                schema_path.to_string_lossy()
1641            ),
1642        )?;
1643
1644        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1645        let c = ValidateArgs {
1646            globs: vec![pattern],
1647            exclude: vec![],
1648            cache_dir: None,
1649            force_schema_fetch: true,
1650            force_validation: true,
1651            no_catalog: true,
1652            config_dir: None,
1653            schema_cache_ttl: None,
1654        };
1655        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1656        assert!(!result.has_errors());
1657        Ok(())
1658    }
1659
1660    #[tokio::test]
1661    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1662        let tmp = tempfile::tempdir()?;
1663        let schema_path = tmp.path().join("schema.json");
1664        fs::write(&schema_path, SCHEMA)?;
1665
1666        let f = tmp.path().join("config.jsonc");
1667        fs::write(
1668            &f,
1669            format!(
1670                r#"{{
1671  /* JSONC comment */
1672  "$schema": "{}",
1673  "name": "hello"
1674}}"#,
1675                schema_path.to_string_lossy()
1676            ),
1677        )?;
1678
1679        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1680        let c = ValidateArgs {
1681            globs: vec![pattern],
1682            exclude: vec![],
1683            cache_dir: None,
1684            force_schema_fetch: true,
1685            force_validation: true,
1686            no_catalog: true,
1687            config_dir: None,
1688            schema_cache_ttl: None,
1689        };
1690        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1691        assert!(!result.has_errors());
1692        Ok(())
1693    }
1694
1695    // --- Catalog-based schema matching ---
1696
1697    const GH_WORKFLOW_SCHEMA: &str = r#"{
1698        "type": "object",
1699        "properties": {
1700            "name": { "type": "string" },
1701            "on": {},
1702            "jobs": { "type": "object" }
1703        },
1704        "required": ["on", "jobs"]
1705    }"#;
1706
1707    fn gh_catalog_json() -> String {
1708        r#"{"version":1,"schemas":[{
1709            "name": "GitHub Workflow",
1710            "description": "GitHub Actions workflow",
1711            "url": "https://www.schemastore.org/github-workflow.json",
1712            "fileMatch": [
1713                "**/.github/workflows/*.yml",
1714                "**/.github/workflows/*.yaml"
1715            ]
1716        }]}"#
1717            .to_string()
1718    }
1719
1720    #[tokio::test]
1721    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1722        let tmp = tempfile::tempdir()?;
1723        let cache_tmp = tempfile::tempdir()?;
1724        let wf_dir = tmp.path().join(".github/workflows");
1725        fs::create_dir_all(&wf_dir)?;
1726        fs::write(
1727            wf_dir.join("ci.yml"),
1728            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1729        )?;
1730
1731        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1732        let client = mock(&[
1733            (
1734                "https://www.schemastore.org/api/json/catalog.json",
1735                &gh_catalog_json(),
1736            ),
1737            (
1738                "https://www.schemastore.org/github-workflow.json",
1739                GH_WORKFLOW_SCHEMA,
1740            ),
1741        ]);
1742        let c = ValidateArgs {
1743            globs: vec![pattern],
1744            exclude: vec![],
1745            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1746            force_schema_fetch: true,
1747            force_validation: true,
1748            no_catalog: false,
1749            config_dir: None,
1750            schema_cache_ttl: None,
1751        };
1752        let result = run_with(&c, Some(client), |_| {}).await?;
1753        assert!(!result.has_errors());
1754        Ok(())
1755    }
1756
1757    #[tokio::test]
1758    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1759        let tmp = tempfile::tempdir()?;
1760        let cache_tmp = tempfile::tempdir()?;
1761        let wf_dir = tmp.path().join(".github/workflows");
1762        fs::create_dir_all(&wf_dir)?;
1763        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1764
1765        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1766        let client = mock(&[
1767            (
1768                "https://www.schemastore.org/api/json/catalog.json",
1769                &gh_catalog_json(),
1770            ),
1771            (
1772                "https://www.schemastore.org/github-workflow.json",
1773                GH_WORKFLOW_SCHEMA,
1774            ),
1775        ]);
1776        let c = ValidateArgs {
1777            globs: vec![pattern],
1778            exclude: vec![],
1779            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1780            force_schema_fetch: true,
1781            force_validation: true,
1782            no_catalog: false,
1783            config_dir: None,
1784            schema_cache_ttl: None,
1785        };
1786        let result = run_with(&c, Some(client), |_| {}).await?;
1787        assert!(result.has_errors());
1788        Ok(())
1789    }
1790
1791    #[tokio::test]
1792    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1793        let tmp = tempfile::tempdir()?;
1794        let cache_tmp = tempfile::tempdir()?;
1795        let wf_dir = tmp.path().join(".github/workflows");
1796        fs::create_dir_all(&wf_dir)?;
1797        fs::write(
1798            wf_dir.join("ci.yml"),
1799            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1800        )?;
1801
1802        let client = mock(&[
1803            (
1804                "https://www.schemastore.org/api/json/catalog.json",
1805                &gh_catalog_json(),
1806            ),
1807            (
1808                "https://www.schemastore.org/github-workflow.json",
1809                GH_WORKFLOW_SCHEMA,
1810            ),
1811        ]);
1812        let c = ValidateArgs {
1813            globs: vec![],
1814            exclude: vec![],
1815            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1816            force_schema_fetch: true,
1817            force_validation: true,
1818            no_catalog: false,
1819            config_dir: None,
1820            schema_cache_ttl: None,
1821        };
1822
1823        let orig_dir = std::env::current_dir()?;
1824        std::env::set_current_dir(tmp.path())?;
1825        let result = run_with(&c, Some(client), |_| {}).await?;
1826        std::env::set_current_dir(orig_dir)?;
1827
1828        assert!(!result.has_errors());
1829        Ok(())
1830    }
1831
1832    // --- TOML tests ---
1833
1834    #[tokio::test]
1835    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1836        let tmp = tempfile::tempdir()?;
1837        let schema_path = tmp.path().join("schema.json");
1838        fs::write(&schema_path, SCHEMA)?;
1839
1840        let f = tmp.path().join("config.toml");
1841        fs::write(
1842            &f,
1843            format!(
1844                "# :schema {}\nname = \"hello\"\n",
1845                schema_path.to_string_lossy()
1846            ),
1847        )?;
1848
1849        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1850        let c = ValidateArgs {
1851            globs: vec![pattern],
1852            exclude: vec![],
1853            cache_dir: None,
1854            force_schema_fetch: true,
1855            force_validation: true,
1856            no_catalog: true,
1857            config_dir: None,
1858            schema_cache_ttl: None,
1859        };
1860        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1861        assert!(!result.has_errors());
1862        Ok(())
1863    }
1864
1865    // --- Rewrite rules + // resolution ---
1866
1867    #[tokio::test]
1868    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1869        let tmp = tempfile::tempdir()?;
1870
1871        let schemas_dir = tmp.path().join("schemas");
1872        fs::create_dir_all(&schemas_dir)?;
1873        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1874
1875        fs::write(
1876            tmp.path().join("lintel.toml"),
1877            r#"
1878[rewrite]
1879"http://localhost:9000/" = "//schemas/"
1880"#,
1881        )?;
1882
1883        let f = tmp.path().join("config.json");
1884        fs::write(
1885            &f,
1886            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1887        )?;
1888
1889        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1890        let c = ValidateArgs {
1891            globs: vec![pattern],
1892            exclude: vec![],
1893            cache_dir: None,
1894            force_schema_fetch: true,
1895            force_validation: true,
1896            no_catalog: true,
1897            config_dir: Some(tmp.path().to_path_buf()),
1898            schema_cache_ttl: None,
1899        };
1900
1901        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1902        assert!(!result.has_errors());
1903        assert_eq!(result.files_checked(), 1);
1904        Ok(())
1905    }
1906
1907    #[tokio::test]
1908    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1909        let tmp = tempfile::tempdir()?;
1910
1911        let schemas_dir = tmp.path().join("schemas");
1912        fs::create_dir_all(&schemas_dir)?;
1913        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1914
1915        fs::write(tmp.path().join("lintel.toml"), "")?;
1916
1917        let sub = tmp.path().join("deeply/nested");
1918        fs::create_dir_all(&sub)?;
1919        let f = sub.join("config.json");
1920        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1921
1922        let pattern = sub.join("*.json").to_string_lossy().to_string();
1923        let c = ValidateArgs {
1924            globs: vec![pattern],
1925            exclude: vec![],
1926            cache_dir: None,
1927            force_schema_fetch: true,
1928            force_validation: true,
1929            no_catalog: true,
1930            config_dir: Some(tmp.path().to_path_buf()),
1931            schema_cache_ttl: None,
1932        };
1933
1934        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1935        assert!(!result.has_errors());
1936        Ok(())
1937    }
1938
1939    // --- Format validation override ---
1940
1941    const FORMAT_SCHEMA: &str = r#"{
1942        "type": "object",
1943        "properties": {
1944            "link": { "type": "string", "format": "uri-reference" }
1945        }
1946    }"#;
1947
1948    #[tokio::test]
1949    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1950        let tmp = tempfile::tempdir()?;
1951        let schema_path = tmp.path().join("schema.json");
1952        fs::write(&schema_path, FORMAT_SCHEMA)?;
1953
1954        let f = tmp.path().join("data.json");
1955        fs::write(
1956            &f,
1957            format!(
1958                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1959                schema_path.to_string_lossy()
1960            ),
1961        )?;
1962
1963        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1964        let c = ValidateArgs {
1965            globs: vec![pattern],
1966            exclude: vec![],
1967            cache_dir: None,
1968            force_schema_fetch: true,
1969            force_validation: true,
1970            no_catalog: true,
1971            config_dir: Some(tmp.path().to_path_buf()),
1972            schema_cache_ttl: None,
1973        };
1974        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1975        assert!(
1976            result.has_errors(),
1977            "expected format error without override"
1978        );
1979        Ok(())
1980    }
1981
1982    #[tokio::test]
1983    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1984        let tmp = tempfile::tempdir()?;
1985        let schema_path = tmp.path().join("schema.json");
1986        fs::write(&schema_path, FORMAT_SCHEMA)?;
1987
1988        let f = tmp.path().join("data.json");
1989        fs::write(
1990            &f,
1991            format!(
1992                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1993                schema_path.to_string_lossy()
1994            ),
1995        )?;
1996
1997        // Use **/data.json to match the absolute path from the tempdir.
1998        fs::write(
1999            tmp.path().join("lintel.toml"),
2000            r#"
2001[[override]]
2002files = ["**/data.json"]
2003validate_formats = false
2004"#,
2005        )?;
2006
2007        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
2008        let c = ValidateArgs {
2009            globs: vec![pattern],
2010            exclude: vec![],
2011            cache_dir: None,
2012            force_schema_fetch: true,
2013            force_validation: true,
2014            no_catalog: true,
2015            config_dir: Some(tmp.path().to_path_buf()),
2016            schema_cache_ttl: None,
2017        };
2018        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2019        assert!(
2020            !result.has_errors(),
2021            "expected no errors with validate_formats = false override"
2022        );
2023        Ok(())
2024    }
2025
2026    // --- Unrecognized extension handling ---
2027
2028    #[tokio::test]
2029    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
2030        let tmp = tempfile::tempdir()?;
2031        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
2032
2033        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
2034        let c = ValidateArgs {
2035            globs: vec![pattern],
2036            exclude: vec![],
2037            cache_dir: None,
2038            force_schema_fetch: true,
2039            force_validation: true,
2040            no_catalog: true,
2041            config_dir: Some(tmp.path().to_path_buf()),
2042            schema_cache_ttl: None,
2043        };
2044        let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2045        assert!(!result.has_errors());
2046        assert_eq!(result.files_checked(), 0);
2047        Ok(())
2048    }
2049
2050    #[tokio::test]
2051    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
2052        let tmp = tempfile::tempdir()?;
2053        let cache_tmp = tempfile::tempdir()?;
2054        // File has .cfg extension (unrecognized) but content is valid JSON
2055        fs::write(
2056            tmp.path().join("myapp.cfg"),
2057            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
2058        )?;
2059
2060        let catalog_json = r#"{"version":1,"schemas":[{
2061            "name": "MyApp Config",
2062            "description": "MyApp configuration",
2063            "url": "https://example.com/myapp.schema.json",
2064            "fileMatch": ["*.cfg"]
2065        }]}"#;
2066        let schema =
2067            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2068
2069        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2070        let client = mock(&[
2071            (
2072                "https://www.schemastore.org/api/json/catalog.json",
2073                catalog_json,
2074            ),
2075            ("https://example.com/myapp.schema.json", schema),
2076        ]);
2077        let c = ValidateArgs {
2078            globs: vec![pattern],
2079            exclude: vec![],
2080            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2081            force_schema_fetch: true,
2082            force_validation: true,
2083            no_catalog: false,
2084            config_dir: Some(tmp.path().to_path_buf()),
2085            schema_cache_ttl: None,
2086        };
2087        let result = run_with(&c, Some(client), |_| {}).await?;
2088        assert!(!result.has_errors());
2089        assert_eq!(result.files_checked(), 1);
2090        Ok(())
2091    }
2092
2093    #[tokio::test]
2094    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
2095        let tmp = tempfile::tempdir()?;
2096        let cache_tmp = tempfile::tempdir()?;
2097        // File matches catalog but content isn't parseable by any format
2098        fs::write(
2099            tmp.path().join("myapp.cfg"),
2100            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
2101        )?;
2102
2103        let catalog_json = r#"{"version":1,"schemas":[{
2104            "name": "MyApp Config",
2105            "description": "MyApp configuration",
2106            "url": "https://example.com/myapp.schema.json",
2107            "fileMatch": ["*.cfg"]
2108        }]}"#;
2109
2110        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2111        let client = mock(&[(
2112            "https://www.schemastore.org/api/json/catalog.json",
2113            catalog_json,
2114        )]);
2115        let c = ValidateArgs {
2116            globs: vec![pattern],
2117            exclude: vec![],
2118            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2119            force_schema_fetch: true,
2120            force_validation: true,
2121            no_catalog: false,
2122            config_dir: Some(tmp.path().to_path_buf()),
2123            schema_cache_ttl: None,
2124        };
2125        let result = run_with(&c, Some(client), |_| {}).await?;
2126        assert!(!result.has_errors());
2127        assert_eq!(result.files_checked(), 0);
2128        Ok(())
2129    }
2130
2131    #[tokio::test]
2132    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
2133        let tmp = tempfile::tempdir()?;
2134        let cache_tmp = tempfile::tempdir()?;
2135        // File has .cfg extension, content is valid JSON but fails schema validation
2136        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
2137
2138        let catalog_json = r#"{"version":1,"schemas":[{
2139            "name": "MyApp Config",
2140            "description": "MyApp configuration",
2141            "url": "https://example.com/myapp.schema.json",
2142            "fileMatch": ["*.cfg"]
2143        }]}"#;
2144        let schema =
2145            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2146
2147        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2148        let client = mock(&[
2149            (
2150                "https://www.schemastore.org/api/json/catalog.json",
2151                catalog_json,
2152            ),
2153            ("https://example.com/myapp.schema.json", schema),
2154        ]);
2155        let c = ValidateArgs {
2156            globs: vec![pattern],
2157            exclude: vec![],
2158            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2159            force_schema_fetch: true,
2160            force_validation: true,
2161            no_catalog: false,
2162            config_dir: Some(tmp.path().to_path_buf()),
2163            schema_cache_ttl: None,
2164        };
2165        let result = run_with(&c, Some(client), |_| {}).await?;
2166        assert!(result.has_errors());
2167        assert_eq!(result.files_checked(), 1);
2168        Ok(())
2169    }
2170
2171    // --- Validation cache ---
2172
2173    #[tokio::test]
2174    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
2175        let tmp = tempfile::tempdir()?;
2176        let schema_path = tmp.path().join("schema.json");
2177        fs::write(&schema_path, SCHEMA)?;
2178
2179        let f = tmp.path().join("valid.json");
2180        fs::write(
2181            &f,
2182            format!(
2183                r#"{{"$schema":"{}","name":"hello"}}"#,
2184                schema_path.to_string_lossy()
2185            ),
2186        )?;
2187
2188        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2189
2190        // First run: force_validation = false so results get cached
2191        let c = ValidateArgs {
2192            globs: vec![pattern.clone()],
2193            exclude: vec![],
2194            cache_dir: None,
2195            force_schema_fetch: true,
2196            force_validation: false,
2197            no_catalog: true,
2198            config_dir: None,
2199            schema_cache_ttl: None,
2200        };
2201        let mut first_statuses = Vec::new();
2202        let result = run_with(&c, Some(mock(&[])), |cf| {
2203            first_statuses.push(cf.validation_cache_status);
2204        })
2205        .await?;
2206        assert!(!result.has_errors());
2207        assert!(result.files_checked() > 0);
2208
2209        // Verify the first run recorded a validation cache miss
2210        assert!(
2211            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2212            "expected at least one validation cache miss on first run"
2213        );
2214
2215        // Second run: same file, same schema — should hit validation cache
2216        let mut second_statuses = Vec::new();
2217        let result = run_with(&c, Some(mock(&[])), |cf| {
2218            second_statuses.push(cf.validation_cache_status);
2219        })
2220        .await?;
2221        assert!(!result.has_errors());
2222
2223        // Verify the second run got a validation cache hit
2224        assert!(
2225            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2226            "expected at least one validation cache hit on second run"
2227        );
2228        Ok(())
2229    }
2230
2231    /// Schemas whose URI contains a fragment (e.g. `…/draft-07/schema#`)
2232    /// must compile without error — the fragment is stripped before being
2233    /// used as the base URI for `$ref` resolution.
2234    #[tokio::test]
2235    async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2236        let tmp = tempfile::tempdir()?;
2237
2238        // A minimal draft-07 schema whose `$schema` ends with `#`.
2239        let schema_body = r#"{
2240            "$schema": "http://json-schema.org/draft-07/schema#",
2241            "type": "object",
2242            "properties": { "name": { "type": "string" } },
2243            "required": ["name"]
2244        }"#;
2245
2246        let schema_url = "http://json-schema.org/draft-07/schema#";
2247
2248        let f = tmp.path().join("data.json");
2249        fs::write(
2250            &f,
2251            format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2252        )?;
2253
2254        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2255        let client = mock(&[(
2256            // The schema URI with fragment — exactly as the `$schema` value appears.
2257            schema_url,
2258            schema_body,
2259        )]);
2260        let c = ValidateArgs {
2261            globs: vec![pattern],
2262            exclude: vec![],
2263            cache_dir: None,
2264            force_schema_fetch: true,
2265            force_validation: true,
2266            no_catalog: true,
2267            config_dir: None,
2268            schema_cache_ttl: None,
2269        };
2270        let result = run_with(&c, Some(client), |_| {}).await?;
2271        assert!(
2272            !result.has_errors(),
2273            "schema URI with fragment should not cause compilation error"
2274        );
2275        assert_eq!(result.files_checked(), 1);
2276        Ok(())
2277    }
2278
2279    #[tokio::test]
2280    async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2281        let tmp = tempfile::tempdir()?;
2282
2283        // Referenced schema with a "name" string definition
2284        std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2285
2286        // Main schema that uses a relative $ref
2287        let schema_path = tmp.path().join("schema.json");
2288        std::fs::write(
2289            &schema_path,
2290            r#"{
2291                "type": "object",
2292                "properties": {
2293                    "name": { "$ref": "./defs.json" }
2294                },
2295                "required": ["name"]
2296            }"#,
2297        )?;
2298
2299        // Valid data file pointing to the local schema
2300        let schema_uri = schema_path.to_string_lossy();
2301        std::fs::write(
2302            tmp.path().join("data.json"),
2303            format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2304        )?;
2305
2306        // Invalid data file (name should be a string per defs.json)
2307        std::fs::write(
2308            tmp.path().join("bad.json"),
2309            format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2310        )?;
2311
2312        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2313        let args = ValidateArgs {
2314            globs: vec![pattern],
2315            exclude: vec![],
2316            cache_dir: None,
2317            force_schema_fetch: true,
2318            force_validation: true,
2319            no_catalog: true,
2320            config_dir: None,
2321            schema_cache_ttl: None,
2322        };
2323        let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2324
2325        // The invalid file should produce an error (name is 42, not a string)
2326        assert!(result.has_errors());
2327        // Exactly one file should have errors (bad.json), the other (data.json) should pass
2328        assert_eq!(result.errors.len(), 1);
2329        Ok(())
2330    }
2331}