Skip to main content

lintel_check/
validate.rs

1use std::collections::BTreeMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use glob::glob;
7use serde_json::Value;
8
9use crate::catalog::{self, CompiledCatalog};
10use crate::config;
11use crate::diagnostics::{
12    find_instance_path_offset, FileDiagnostic, ParseDiagnostic, ValidationDiagnostic,
13};
14use crate::discover;
15use crate::parsers::{self, FileFormat, JsoncParser, Parser};
16use crate::retriever::{default_cache_dir, CacheStatus, HttpClient, SchemaCache};
17
18pub struct ValidateArgs {
19    /// Glob patterns to find files (empty = auto-discover)
20    pub globs: Vec<String>,
21
22    /// Exclude files matching these globs (repeatable)
23    pub exclude: Vec<String>,
24
25    /// Cache directory for remote schemas
26    pub cache_dir: Option<String>,
27
28    /// Disable schema caching
29    pub no_cache: bool,
30
31    /// Disable SchemaStore catalog matching
32    pub no_catalog: bool,
33
34    /// Force file format for all inputs
35    pub format: Option<parsers::FileFormat>,
36
37    /// Directory to search for `lintel.toml` (defaults to cwd)
38    pub config_dir: Option<PathBuf>,
39}
40
41/// A single lint error produced during validation.
42pub enum LintError {
43    Parse(ParseDiagnostic),
44    Validation(ValidationDiagnostic),
45    File(FileDiagnostic),
46}
47
48impl LintError {
49    /// File path associated with this error.
50    pub fn path(&self) -> &str {
51        match self {
52            LintError::Parse(d) => d.src.name(),
53            LintError::Validation(d) => &d.path,
54            LintError::File(d) => &d.path,
55        }
56    }
57
58    /// Human-readable error message.
59    pub fn message(&self) -> &str {
60        match self {
61            LintError::Parse(d) => &d.message,
62            LintError::Validation(d) => &d.message,
63            LintError::File(d) => &d.message,
64        }
65    }
66
67    /// Byte offset in the source file (for sorting).
68    fn offset(&self) -> usize {
69        match self {
70            LintError::Parse(d) => d.span.offset(),
71            LintError::Validation(d) => d.span.offset(),
72            LintError::File(_) => 0,
73        }
74    }
75
76    /// Convert into a boxed miette Diagnostic for rich rendering.
77    pub fn into_diagnostic(self) -> Box<dyn miette::Diagnostic + Send + Sync> {
78        match self {
79            LintError::Parse(d) => Box::new(d),
80            LintError::Validation(d) => Box::new(d),
81            LintError::File(d) => Box::new(d),
82        }
83    }
84}
85
86/// A file that was checked and the schema it resolved to.
87pub struct CheckedFile {
88    pub path: String,
89    pub schema: String,
90    /// `None` for local schemas and builtins; `Some` for remote schemas.
91    pub cache_status: Option<CacheStatus>,
92}
93
94/// Result of a validation run.
95pub struct ValidateResult {
96    pub errors: Vec<LintError>,
97    pub checked: Vec<CheckedFile>,
98}
99
100impl ValidateResult {
101    pub fn has_errors(&self) -> bool {
102        !self.errors.is_empty()
103    }
104
105    pub fn files_checked(&self) -> usize {
106        self.checked.len()
107    }
108}
109
110// ---------------------------------------------------------------------------
111// Internal types
112// ---------------------------------------------------------------------------
113
114/// A file that has been parsed and matched to a schema URI.
115struct ParsedFile {
116    path: String,
117    content: String,
118    instance: Value,
119    /// Original schema URI before rewrites (for override matching).
120    original_schema_uri: String,
121}
122
123// ---------------------------------------------------------------------------
124// Config loading
125// ---------------------------------------------------------------------------
126
127/// Locate `lintel.toml`, load the full config, and return the config directory.
128/// Returns `(config, config_dir, config_path)`.  When no config is found or
129/// cwd is unavailable the config is default and `config_path` is `None`.
130fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
131    let start_dir = match search_dir {
132        Some(d) => d.to_path_buf(),
133        None => match std::env::current_dir() {
134            Ok(d) => d,
135            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
136        },
137    };
138
139    let Some(config_path) = config::find_config_path(&start_dir) else {
140        return (config::Config::default(), start_dir, None);
141    };
142
143    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
144    let cfg = config::find_and_load(&start_dir)
145        .ok()
146        .flatten()
147        .unwrap_or_default();
148    (cfg, dir, Some(config_path))
149}
150
151// ---------------------------------------------------------------------------
152// File collection
153// ---------------------------------------------------------------------------
154
155/// Collect input files from globs/directories, applying exclude filters.
156fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
157    if globs.is_empty() {
158        return discover::discover_files(".", exclude);
159    }
160
161    let mut result = Vec::new();
162    for pattern in globs {
163        let path = Path::new(pattern);
164        if path.is_dir() {
165            result.extend(discover::discover_files(pattern, exclude)?);
166        } else {
167            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
168                let path = entry?;
169                if path.is_file() && !is_excluded(&path, exclude) {
170                    result.push(path);
171                }
172            }
173        }
174    }
175    Ok(result)
176}
177
178fn is_excluded(path: &Path, excludes: &[String]) -> bool {
179    let path_str = match path.to_str() {
180        Some(s) => s.strip_prefix("./").unwrap_or(s),
181        None => return false,
182    };
183    excludes
184        .iter()
185        .any(|pattern| glob_match::glob_match(pattern, path_str))
186}
187
188// ---------------------------------------------------------------------------
189// lintel.toml self-validation
190// ---------------------------------------------------------------------------
191
192/// Validate `lintel.toml` against its built-in schema.
193fn validate_config(
194    config_path: &Path,
195    errors: &mut Vec<LintError>,
196    checked: &mut Vec<CheckedFile>,
197    on_check: &mut impl FnMut(&CheckedFile),
198) -> Result<()> {
199    let content = fs::read_to_string(config_path)?;
200    let config_value: Value = toml::from_str(&content)
201        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
202    let schema_value = config::schema();
203    if let Ok(validator) = jsonschema::options().build(&schema_value) {
204        let path_str = config_path.display().to_string();
205        for error in validator.iter_errors(&config_value) {
206            let ip = error.instance_path().to_string();
207            let offset = find_instance_path_offset(&content, &ip);
208            errors.push(LintError::Validation(ValidationDiagnostic {
209                src: miette::NamedSource::new(&path_str, content.clone()),
210                span: offset.into(),
211                path: path_str.clone(),
212                instance_path: ip,
213                message: error.to_string(),
214            }));
215        }
216        let cf = CheckedFile {
217            path: path_str,
218            schema: "(builtin)".to_string(),
219            cache_status: None,
220        };
221        on_check(&cf);
222        checked.push(cf);
223    }
224    Ok(())
225}
226
227// ---------------------------------------------------------------------------
228// Phase 1: Parse files and resolve schema URIs
229// ---------------------------------------------------------------------------
230
231/// Parse each file, extract its schema URI, apply rewrites, and group by
232/// resolved schema URI.
233fn parse_and_group_files(
234    files: &[PathBuf],
235    args: &ValidateArgs,
236    config: &config::Config,
237    config_dir: &Path,
238    compiled_catalogs: &[CompiledCatalog],
239    errors: &mut Vec<LintError>,
240) -> BTreeMap<String, Vec<ParsedFile>> {
241    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
242
243    for path in files {
244        let content = match fs::read_to_string(path) {
245            Ok(c) => c,
246            Err(e) => {
247                errors.push(LintError::File(FileDiagnostic {
248                    path: path.display().to_string(),
249                    message: format!("failed to read: {e}"),
250                }));
251                continue;
252            }
253        };
254
255        let mut format = args.format.unwrap_or_else(|| parsers::detect_format(path));
256        let mut parser = parsers::parser_for(format);
257        let path_str = path.display().to_string();
258        let file_name = path
259            .file_name()
260            .and_then(|n| n.to_str())
261            .unwrap_or(&path_str);
262
263        // Parse with detected format, falling back to JSONC for catalog-matched .json files.
264        let instance = match parser.parse(&content, &path_str) {
265            Ok(val) => val,
266            Err(parse_err) => {
267                // Only attempt JSONC fallback for .json files that match a catalog entry.
268                let should_try_jsonc = format == FileFormat::Json
269                    && compiled_catalogs
270                        .iter()
271                        .any(|cat| cat.find_schema(&path_str, file_name).is_some());
272
273                if !should_try_jsonc {
274                    errors.push(LintError::Parse(parse_err));
275                    continue;
276                }
277
278                match JsoncParser.parse(&content, &path_str) {
279                    Ok(val) => {
280                        format = FileFormat::Jsonc;
281                        parser = parsers::parser_for(format);
282                        val
283                    }
284                    Err(jsonc_err) => {
285                        errors.push(LintError::Parse(jsonc_err));
286                        continue;
287                    }
288                }
289            }
290        };
291
292        // Skip markdown files with no frontmatter
293        if instance.is_null() {
294            continue;
295        }
296
297        // Schema resolution priority:
298        // 1. Inline $schema / YAML modeline (always wins)
299        // 2. Custom schema mappings from lintel.toml [schemas]
300        // 3. Catalog matching (SchemaStore + additional registries)
301        let schema_uri = parser
302            .extract_schema_uri(&content, &instance)
303            .or_else(|| {
304                config
305                    .find_schema_mapping(&path_str, file_name)
306                    .map(str::to_string)
307            })
308            .or_else(|| {
309                compiled_catalogs
310                    .iter()
311                    .find_map(|cat| cat.find_schema(&path_str, file_name))
312                    .map(str::to_string)
313            });
314        let Some(schema_uri) = schema_uri else {
315            continue;
316        };
317
318        // Keep original URI for override matching (before rewrites)
319        let original_schema_uri = schema_uri.clone();
320
321        // Apply rewrite rules, then resolve // paths relative to lintel.toml
322        let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
323        let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
324
325        // Resolve relative local paths against the file's parent directory.
326        let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
327        let schema_uri = if !is_remote {
328            path.parent()
329                .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
330                .unwrap_or(schema_uri)
331        } else {
332            schema_uri
333        };
334
335        schema_groups
336            .entry(schema_uri)
337            .or_default()
338            .push(ParsedFile {
339                path: path_str,
340                content,
341                instance,
342                original_schema_uri,
343            });
344    }
345
346    schema_groups
347}
348
349// ---------------------------------------------------------------------------
350// Phase 2: Schema fetching, compilation, and instance validation
351// ---------------------------------------------------------------------------
352
353/// Fetch a schema by URI, returning its parsed JSON and cache status.
354fn fetch_schema<C: HttpClient>(
355    schema_uri: &str,
356    retriever: &SchemaCache<C>,
357    group: &[ParsedFile],
358    errors: &mut Vec<LintError>,
359    checked: &mut Vec<CheckedFile>,
360    on_check: &mut impl FnMut(&CheckedFile),
361) -> Option<(Value, Option<CacheStatus>)> {
362    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
363
364    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
365        retriever
366            .fetch(schema_uri)
367            .map(|(v, status)| (v, Some(status)))
368            .map_err(|e| format!("failed to fetch schema: {schema_uri}: {e}"))
369    } else {
370        fs::read_to_string(schema_uri)
371            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
372            .and_then(|content| {
373                serde_json::from_str::<Value>(&content)
374                    .map(|v| (v, None))
375                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
376            })
377    };
378
379    match result {
380        Ok(value) => Some(value),
381        Err(message) => {
382            report_group_error(&message, schema_uri, None, group, errors, checked, on_check);
383            None
384        }
385    }
386}
387
388/// Report the same error for every file in a schema group.
389fn report_group_error(
390    message: &str,
391    schema_uri: &str,
392    cache_status: Option<CacheStatus>,
393    group: &[ParsedFile],
394    errors: &mut Vec<LintError>,
395    checked: &mut Vec<CheckedFile>,
396    on_check: &mut impl FnMut(&CheckedFile),
397) {
398    for pf in group {
399        let cf = CheckedFile {
400            path: pf.path.clone(),
401            schema: schema_uri.to_string(),
402            cache_status,
403        };
404        on_check(&cf);
405        checked.push(cf);
406        errors.push(LintError::File(FileDiagnostic {
407            path: pf.path.clone(),
408            message: message.to_string(),
409        }));
410    }
411}
412
413/// Mark every file in a group as checked (no errors).
414fn mark_group_checked(
415    schema_uri: &str,
416    cache_status: Option<CacheStatus>,
417    group: &[ParsedFile],
418    checked: &mut Vec<CheckedFile>,
419    on_check: &mut impl FnMut(&CheckedFile),
420) {
421    for pf in group {
422        let cf = CheckedFile {
423            path: pf.path.clone(),
424            schema: schema_uri.to_string(),
425            cache_status,
426        };
427        on_check(&cf);
428        checked.push(cf);
429    }
430}
431
432/// Validate all files in a group against an already-compiled validator.
433fn validate_group(
434    validator: &jsonschema::Validator,
435    schema_uri: &str,
436    cache_status: Option<CacheStatus>,
437    group: &[ParsedFile],
438    errors: &mut Vec<LintError>,
439    checked: &mut Vec<CheckedFile>,
440    on_check: &mut impl FnMut(&CheckedFile),
441) {
442    for pf in group {
443        let cf = CheckedFile {
444            path: pf.path.clone(),
445            schema: schema_uri.to_string(),
446            cache_status,
447        };
448        on_check(&cf);
449        checked.push(cf);
450
451        for error in validator.iter_errors(&pf.instance) {
452            let ip = error.instance_path().to_string();
453            let offset = find_instance_path_offset(&pf.content, &ip);
454            errors.push(LintError::Validation(ValidationDiagnostic {
455                src: miette::NamedSource::new(&pf.path, pf.content.clone()),
456                span: offset.into(),
457                path: pf.path.clone(),
458                instance_path: ip,
459                message: error.to_string(),
460            }));
461        }
462    }
463}
464
465// ---------------------------------------------------------------------------
466// Public API
467// ---------------------------------------------------------------------------
468
469pub async fn run<C: HttpClient>(args: &ValidateArgs, client: C) -> Result<ValidateResult> {
470    run_with(args, client, |_| {}).await
471}
472
473/// Like [`run`], but calls `on_check` each time a file is checked, allowing
474/// callers to stream progress (e.g. verbose output) as files are processed.
475pub async fn run_with<C: HttpClient>(
476    args: &ValidateArgs,
477    client: C,
478    mut on_check: impl FnMut(&CheckedFile),
479) -> Result<ValidateResult> {
480    let cache_dir = if args.no_cache {
481        None
482    } else {
483        Some(
484            args.cache_dir
485                .as_ref()
486                .map(PathBuf::from)
487                .unwrap_or_else(default_cache_dir),
488        )
489    };
490    let retriever = SchemaCache::new(cache_dir, client.clone());
491
492    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
493    let files = collect_files(&args.globs, &args.exclude)?;
494
495    let mut compiled_catalogs = Vec::new();
496
497    if !args.no_catalog {
498        // Default Lintel catalog (github:lintel-rs/catalog)
499        match catalog::fetch_registry(&retriever, catalog::DEFAULT_REGISTRY) {
500            Ok(cat) => compiled_catalogs.push(CompiledCatalog::compile(&cat)),
501            Err(e) => {
502                eprintln!(
503                    "warning: failed to fetch default catalog {}: {e}",
504                    catalog::DEFAULT_REGISTRY
505                );
506            }
507        }
508        // SchemaStore catalog
509        match catalog::fetch_catalog(&retriever) {
510            Ok(cat) => compiled_catalogs.push(CompiledCatalog::compile(&cat)),
511            Err(e) => {
512                eprintln!("warning: failed to fetch SchemaStore catalog: {e}");
513            }
514        }
515        // Additional registries from lintel.toml
516        for registry_url in &config.registries {
517            match catalog::fetch_registry(&retriever, registry_url) {
518                Ok(cat) => compiled_catalogs.push(CompiledCatalog::compile(&cat)),
519                Err(e) => {
520                    eprintln!("warning: failed to fetch registry {registry_url}: {e}");
521                }
522            }
523        }
524    }
525
526    let mut errors: Vec<LintError> = Vec::new();
527    let mut checked: Vec<CheckedFile> = Vec::new();
528
529    // Validate lintel.toml against its own schema
530    if let Some(config_path) = config_path {
531        validate_config(&config_path, &mut errors, &mut checked, &mut on_check)?;
532    }
533
534    // Phase 1: Parse files and resolve schema URIs
535    let schema_groups = parse_and_group_files(
536        &files,
537        args,
538        &config,
539        &config_dir,
540        &compiled_catalogs,
541        &mut errors,
542    );
543
544    // Phase 2: Compile each schema once and validate all matching files
545    for (schema_uri, group) in &schema_groups {
546        let Some((schema_value, cache_status)) = fetch_schema(
547            schema_uri,
548            &retriever,
549            group,
550            &mut errors,
551            &mut checked,
552            &mut on_check,
553        ) else {
554            continue;
555        };
556
557        // If ANY file in the group matches a `validate_formats = false` override,
558        // disable format validation for the whole group (they share one compiled validator).
559        let validate_formats = group.iter().all(|pf| {
560            config
561                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
562        });
563
564        let validator = match jsonschema::async_options()
565            .with_retriever(retriever.clone())
566            .should_validate_formats(validate_formats)
567            .build(&schema_value)
568            .await
569        {
570            Ok(v) => v,
571            Err(e) => {
572                // When format validation is disabled and the compilation error
573                // is a uri-reference issue (e.g. Rust-style $ref paths in
574                // vector.json), skip validation silently.
575                if !validate_formats && e.to_string().contains("uri-reference") {
576                    mark_group_checked(
577                        schema_uri,
578                        cache_status,
579                        group,
580                        &mut checked,
581                        &mut on_check,
582                    );
583                    continue;
584                }
585                report_group_error(
586                    &format!("failed to compile schema: {e}"),
587                    schema_uri,
588                    cache_status,
589                    group,
590                    &mut errors,
591                    &mut checked,
592                    &mut on_check,
593                );
594                continue;
595            }
596        };
597
598        validate_group(
599            &validator,
600            schema_uri,
601            cache_status,
602            group,
603            &mut errors,
604            &mut checked,
605            &mut on_check,
606        );
607    }
608
609    // Sort errors for deterministic output (by path, then by span offset)
610    errors.sort_by(|a, b| {
611        a.path()
612            .cmp(b.path())
613            .then_with(|| a.offset().cmp(&b.offset()))
614    });
615
616    Ok(ValidateResult { errors, checked })
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622    use crate::retriever::HttpClient;
623    use std::collections::HashMap;
624    use std::error::Error;
625    use std::path::Path;
626
627    #[derive(Clone)]
628    struct MockClient(HashMap<String, String>);
629
630    impl HttpClient for MockClient {
631        fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
632            self.0
633                .get(uri)
634                .cloned()
635                .ok_or_else(|| format!("mock: no response for {uri}").into())
636        }
637    }
638
639    fn mock(entries: &[(&str, &str)]) -> MockClient {
640        MockClient(
641            entries
642                .iter()
643                .map(|(k, v)| (k.to_string(), v.to_string()))
644                .collect(),
645        )
646    }
647
648    fn testdata() -> PathBuf {
649        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
650    }
651
652    /// Build glob patterns that scan one or more testdata directories for all supported file types.
653    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
654        dirs.iter()
655            .flat_map(|dir| {
656                let base = testdata().join(dir);
657                vec![
658                    base.join("*.json").to_string_lossy().to_string(),
659                    base.join("*.yaml").to_string_lossy().to_string(),
660                    base.join("*.yml").to_string_lossy().to_string(),
661                    base.join("*.json5").to_string_lossy().to_string(),
662                    base.join("*.jsonc").to_string_lossy().to_string(),
663                    base.join("*.toml").to_string_lossy().to_string(),
664                ]
665            })
666            .collect()
667    }
668
669    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
670        ValidateArgs {
671            globs: scenario_globs(dirs),
672            exclude: vec![],
673            cache_dir: None,
674            no_cache: true,
675            no_catalog: true,
676            format: None,
677            config_dir: None,
678        }
679    }
680
681    const SCHEMA: &str =
682        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
683
684    fn schema_mock() -> MockClient {
685        mock(&[("https://example.com/schema.json", SCHEMA)])
686    }
687
688    // --- Directory scanning tests ---
689
690    #[tokio::test]
691    async fn no_matching_files() {
692        let tmp = tempfile::tempdir().unwrap();
693        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
694        let c = ValidateArgs {
695            globs: vec![pattern],
696            exclude: vec![],
697            cache_dir: None,
698            no_cache: true,
699            no_catalog: true,
700            format: None,
701            config_dir: None,
702        };
703        let result = run(&c, mock(&[])).await.unwrap();
704        assert!(!result.has_errors());
705    }
706
707    #[tokio::test]
708    async fn dir_all_valid() {
709        let c = args_for_dirs(&["positive_tests"]);
710        let result = run(&c, schema_mock()).await.unwrap();
711        assert!(!result.has_errors());
712    }
713
714    #[tokio::test]
715    async fn dir_all_invalid() {
716        let c = args_for_dirs(&["negative_tests"]);
717        let result = run(&c, schema_mock()).await.unwrap();
718        assert!(result.has_errors());
719    }
720
721    #[tokio::test]
722    async fn dir_mixed_valid_and_invalid() {
723        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
724        let result = run(&c, schema_mock()).await.unwrap();
725        assert!(result.has_errors());
726    }
727
728    #[tokio::test]
729    async fn dir_no_schemas_skipped() {
730        let c = args_for_dirs(&["no_schema"]);
731        let result = run(&c, mock(&[])).await.unwrap();
732        assert!(!result.has_errors());
733    }
734
735    #[tokio::test]
736    async fn dir_valid_with_no_schema_files() {
737        let c = args_for_dirs(&["positive_tests", "no_schema"]);
738        let result = run(&c, schema_mock()).await.unwrap();
739        assert!(!result.has_errors());
740    }
741
742    // --- Directory as positional arg ---
743
744    #[tokio::test]
745    async fn directory_arg_discovers_files() {
746        let dir = testdata().join("positive_tests");
747        let c = ValidateArgs {
748            globs: vec![dir.to_string_lossy().to_string()],
749            exclude: vec![],
750            cache_dir: None,
751            no_cache: true,
752            no_catalog: true,
753            format: None,
754            config_dir: None,
755        };
756        let result = run(&c, schema_mock()).await.unwrap();
757        assert!(!result.has_errors());
758        assert!(result.files_checked() > 0);
759    }
760
761    #[tokio::test]
762    async fn multiple_directory_args() {
763        let pos_dir = testdata().join("positive_tests");
764        let no_schema_dir = testdata().join("no_schema");
765        let c = ValidateArgs {
766            globs: vec![
767                pos_dir.to_string_lossy().to_string(),
768                no_schema_dir.to_string_lossy().to_string(),
769            ],
770            exclude: vec![],
771            cache_dir: None,
772            no_cache: true,
773            no_catalog: true,
774            format: None,
775            config_dir: None,
776        };
777        let result = run(&c, schema_mock()).await.unwrap();
778        assert!(!result.has_errors());
779    }
780
781    #[tokio::test]
782    async fn mix_directory_and_glob_args() {
783        let dir = testdata().join("positive_tests");
784        let glob_pattern = testdata()
785            .join("no_schema")
786            .join("*.json")
787            .to_string_lossy()
788            .to_string();
789        let c = ValidateArgs {
790            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
791            exclude: vec![],
792            cache_dir: None,
793            no_cache: true,
794            no_catalog: true,
795            format: None,
796            config_dir: None,
797        };
798        let result = run(&c, schema_mock()).await.unwrap();
799        assert!(!result.has_errors());
800    }
801
802    #[tokio::test]
803    async fn malformed_json_parse_error() {
804        let base = testdata().join("malformed");
805        let c = ValidateArgs {
806            globs: vec![base.join("*.json").to_string_lossy().to_string()],
807            exclude: vec![],
808            cache_dir: None,
809            no_cache: true,
810            no_catalog: true,
811            format: None,
812            config_dir: None,
813        };
814        let result = run(&c, mock(&[])).await.unwrap();
815        assert!(result.has_errors());
816    }
817
818    #[tokio::test]
819    async fn malformed_yaml_parse_error() {
820        let base = testdata().join("malformed");
821        let c = ValidateArgs {
822            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
823            exclude: vec![],
824            cache_dir: None,
825            no_cache: true,
826            no_catalog: true,
827            format: None,
828            config_dir: None,
829        };
830        let result = run(&c, mock(&[])).await.unwrap();
831        assert!(result.has_errors());
832    }
833
834    // --- Exclude filter ---
835
836    #[tokio::test]
837    async fn exclude_filters_files_in_dir() {
838        let base = testdata().join("negative_tests");
839        let c = ValidateArgs {
840            globs: scenario_globs(&["positive_tests", "negative_tests"]),
841            exclude: vec![
842                base.join("missing_name.json").to_string_lossy().to_string(),
843                base.join("missing_name.toml").to_string_lossy().to_string(),
844                base.join("missing_name.yaml").to_string_lossy().to_string(),
845            ],
846            cache_dir: None,
847            no_cache: true,
848            no_catalog: true,
849            format: None,
850            config_dir: None,
851        };
852        let result = run(&c, schema_mock()).await.unwrap();
853        assert!(!result.has_errors());
854    }
855
856    // --- Cache options ---
857
858    #[tokio::test]
859    async fn custom_cache_dir() {
860        let cache_tmp = tempfile::tempdir().unwrap();
861        let c = ValidateArgs {
862            globs: scenario_globs(&["positive_tests"]),
863            exclude: vec![],
864            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
865            no_cache: false,
866            no_catalog: true,
867            format: None,
868            config_dir: None,
869        };
870        let result = run(&c, schema_mock()).await.unwrap();
871        assert!(!result.has_errors());
872
873        // Schema was fetched once and cached
874        let entries: Vec<_> = fs::read_dir(cache_tmp.path()).unwrap().collect();
875        assert_eq!(entries.len(), 1);
876    }
877
878    // --- Local schema ---
879
880    #[tokio::test]
881    async fn json_valid_with_local_schema() {
882        let tmp = tempfile::tempdir().unwrap();
883        let schema_path = tmp.path().join("schema.json");
884        fs::write(&schema_path, SCHEMA).unwrap();
885
886        let f = tmp.path().join("valid.json");
887        fs::write(
888            &f,
889            format!(
890                r#"{{"$schema":"{}","name":"hello"}}"#,
891                schema_path.to_string_lossy()
892            ),
893        )
894        .unwrap();
895
896        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
897        let c = ValidateArgs {
898            globs: vec![pattern],
899            exclude: vec![],
900            cache_dir: None,
901            no_cache: true,
902            no_catalog: true,
903            format: None,
904            config_dir: None,
905        };
906        let result = run(&c, mock(&[])).await.unwrap();
907        assert!(!result.has_errors());
908    }
909
910    #[tokio::test]
911    async fn yaml_valid_with_local_schema() {
912        let tmp = tempfile::tempdir().unwrap();
913        let schema_path = tmp.path().join("schema.json");
914        fs::write(&schema_path, SCHEMA).unwrap();
915
916        let f = tmp.path().join("valid.yaml");
917        fs::write(
918            &f,
919            format!(
920                "# yaml-language-server: $schema={}\nname: hello\n",
921                schema_path.to_string_lossy()
922            ),
923        )
924        .unwrap();
925
926        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
927        let c = ValidateArgs {
928            globs: vec![pattern],
929            exclude: vec![],
930            cache_dir: None,
931            no_cache: true,
932            no_catalog: true,
933            format: None,
934            config_dir: None,
935        };
936        let result = run(&c, mock(&[])).await.unwrap();
937        assert!(!result.has_errors());
938    }
939
940    #[tokio::test]
941    async fn missing_local_schema_errors() {
942        let tmp = tempfile::tempdir().unwrap();
943        let f = tmp.path().join("ref.json");
944        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#).unwrap();
945
946        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
947        let c = ValidateArgs {
948            globs: vec![pattern],
949            exclude: vec![],
950            cache_dir: None,
951            no_cache: true,
952            no_catalog: true,
953            format: None,
954            config_dir: None,
955        };
956        let result = run(&c, mock(&[])).await.unwrap();
957        assert!(result.has_errors());
958    }
959
960    // --- JSON5 / JSONC tests ---
961
962    #[tokio::test]
963    async fn json5_valid_with_schema() {
964        let tmp = tempfile::tempdir().unwrap();
965        let schema_path = tmp.path().join("schema.json");
966        fs::write(&schema_path, SCHEMA).unwrap();
967
968        let f = tmp.path().join("config.json5");
969        fs::write(
970            &f,
971            format!(
972                r#"{{
973  // JSON5 comment
974  "$schema": "{}",
975  name: "hello",
976}}"#,
977                schema_path.to_string_lossy()
978            ),
979        )
980        .unwrap();
981
982        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
983        let c = ValidateArgs {
984            globs: vec![pattern],
985            exclude: vec![],
986            cache_dir: None,
987            no_cache: true,
988            no_catalog: true,
989            format: None,
990            config_dir: None,
991        };
992        let result = run(&c, mock(&[])).await.unwrap();
993        assert!(!result.has_errors());
994    }
995
996    #[tokio::test]
997    async fn jsonc_valid_with_schema() {
998        let tmp = tempfile::tempdir().unwrap();
999        let schema_path = tmp.path().join("schema.json");
1000        fs::write(&schema_path, SCHEMA).unwrap();
1001
1002        let f = tmp.path().join("config.jsonc");
1003        fs::write(
1004            &f,
1005            format!(
1006                r#"{{
1007  /* JSONC comment */
1008  "$schema": "{}",
1009  "name": "hello"
1010}}"#,
1011                schema_path.to_string_lossy()
1012            ),
1013        )
1014        .unwrap();
1015
1016        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1017        let c = ValidateArgs {
1018            globs: vec![pattern],
1019            exclude: vec![],
1020            cache_dir: None,
1021            no_cache: true,
1022            no_catalog: true,
1023            format: None,
1024            config_dir: None,
1025        };
1026        let result = run(&c, mock(&[])).await.unwrap();
1027        assert!(!result.has_errors());
1028    }
1029
1030    // --- Catalog-based schema matching ---
1031
1032    const GH_WORKFLOW_SCHEMA: &str = r#"{
1033        "type": "object",
1034        "properties": {
1035            "name": { "type": "string" },
1036            "on": {},
1037            "jobs": { "type": "object" }
1038        },
1039        "required": ["on", "jobs"]
1040    }"#;
1041
1042    fn gh_catalog_json() -> String {
1043        r#"{"schemas":[{
1044            "name": "GitHub Workflow",
1045            "url": "https://www.schemastore.org/github-workflow.json",
1046            "fileMatch": [
1047                "**/.github/workflows/*.yml",
1048                "**/.github/workflows/*.yaml"
1049            ]
1050        }]}"#
1051            .to_string()
1052    }
1053
1054    #[tokio::test]
1055    async fn catalog_matches_github_workflow_valid() {
1056        let tmp = tempfile::tempdir().unwrap();
1057        let wf_dir = tmp.path().join(".github/workflows");
1058        fs::create_dir_all(&wf_dir).unwrap();
1059        fs::write(
1060            wf_dir.join("ci.yml"),
1061            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1062        )
1063        .unwrap();
1064
1065        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1066        let client = mock(&[
1067            (
1068                "https://www.schemastore.org/api/json/catalog.json",
1069                &gh_catalog_json(),
1070            ),
1071            (
1072                "https://www.schemastore.org/github-workflow.json",
1073                GH_WORKFLOW_SCHEMA,
1074            ),
1075        ]);
1076        let c = ValidateArgs {
1077            globs: vec![pattern],
1078            exclude: vec![],
1079            cache_dir: None,
1080            no_cache: true,
1081            no_catalog: false,
1082            format: None,
1083            config_dir: None,
1084        };
1085        let result = run(&c, client).await.unwrap();
1086        assert!(!result.has_errors());
1087    }
1088
1089    #[tokio::test]
1090    async fn catalog_matches_github_workflow_invalid() {
1091        let tmp = tempfile::tempdir().unwrap();
1092        let wf_dir = tmp.path().join(".github/workflows");
1093        fs::create_dir_all(&wf_dir).unwrap();
1094        fs::write(wf_dir.join("bad.yml"), "name: Broken\n").unwrap();
1095
1096        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1097        let client = mock(&[
1098            (
1099                "https://www.schemastore.org/api/json/catalog.json",
1100                &gh_catalog_json(),
1101            ),
1102            (
1103                "https://www.schemastore.org/github-workflow.json",
1104                GH_WORKFLOW_SCHEMA,
1105            ),
1106        ]);
1107        let c = ValidateArgs {
1108            globs: vec![pattern],
1109            exclude: vec![],
1110            cache_dir: None,
1111            no_cache: true,
1112            no_catalog: false,
1113            format: None,
1114            config_dir: None,
1115        };
1116        let result = run(&c, client).await.unwrap();
1117        assert!(result.has_errors());
1118    }
1119
1120    #[tokio::test]
1121    async fn auto_discover_finds_github_workflows() {
1122        let tmp = tempfile::tempdir().unwrap();
1123        let wf_dir = tmp.path().join(".github/workflows");
1124        fs::create_dir_all(&wf_dir).unwrap();
1125        fs::write(
1126            wf_dir.join("ci.yml"),
1127            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1128        )
1129        .unwrap();
1130
1131        let client = mock(&[
1132            (
1133                "https://www.schemastore.org/api/json/catalog.json",
1134                &gh_catalog_json(),
1135            ),
1136            (
1137                "https://www.schemastore.org/github-workflow.json",
1138                GH_WORKFLOW_SCHEMA,
1139            ),
1140        ]);
1141        let c = ValidateArgs {
1142            globs: vec![],
1143            exclude: vec![],
1144            cache_dir: None,
1145            no_cache: true,
1146            no_catalog: false,
1147            format: None,
1148            config_dir: None,
1149        };
1150
1151        let orig_dir = std::env::current_dir().unwrap();
1152        std::env::set_current_dir(tmp.path()).unwrap();
1153        let result = run(&c, client).await.unwrap();
1154        std::env::set_current_dir(orig_dir).unwrap();
1155
1156        assert!(!result.has_errors());
1157    }
1158
1159    // --- TOML tests ---
1160
1161    #[tokio::test]
1162    async fn toml_valid_with_schema() {
1163        let tmp = tempfile::tempdir().unwrap();
1164        let schema_path = tmp.path().join("schema.json");
1165        fs::write(&schema_path, SCHEMA).unwrap();
1166
1167        let f = tmp.path().join("config.toml");
1168        fs::write(
1169            &f,
1170            format!(
1171                "# $schema: {}\nname = \"hello\"\n",
1172                schema_path.to_string_lossy()
1173            ),
1174        )
1175        .unwrap();
1176
1177        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1178        let c = ValidateArgs {
1179            globs: vec![pattern],
1180            exclude: vec![],
1181            cache_dir: None,
1182            no_cache: true,
1183            no_catalog: true,
1184            format: None,
1185            config_dir: None,
1186        };
1187        let result = run(&c, mock(&[])).await.unwrap();
1188        assert!(!result.has_errors());
1189    }
1190
1191    // --- Rewrite rules + // resolution ---
1192
1193    #[tokio::test]
1194    async fn rewrite_rule_with_double_slash_resolves_schema() {
1195        let tmp = tempfile::tempdir().unwrap();
1196
1197        let schemas_dir = tmp.path().join("schemas");
1198        fs::create_dir_all(&schemas_dir).unwrap();
1199        fs::write(&schemas_dir.join("test.json"), SCHEMA).unwrap();
1200
1201        fs::write(
1202            tmp.path().join("lintel.toml"),
1203            r#"
1204[rewrite]
1205"http://localhost:9000/" = "//schemas/"
1206"#,
1207        )
1208        .unwrap();
1209
1210        let f = tmp.path().join("config.json");
1211        fs::write(
1212            &f,
1213            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1214        )
1215        .unwrap();
1216
1217        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1218        let c = ValidateArgs {
1219            globs: vec![pattern],
1220            exclude: vec![],
1221            cache_dir: None,
1222            no_cache: true,
1223            no_catalog: true,
1224            format: None,
1225            config_dir: Some(tmp.path().to_path_buf()),
1226        };
1227
1228        let result = run(&c, mock(&[])).await.unwrap();
1229        assert!(!result.has_errors());
1230        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1231    }
1232
1233    #[tokio::test]
1234    async fn double_slash_schema_resolves_relative_to_config() {
1235        let tmp = tempfile::tempdir().unwrap();
1236
1237        let schemas_dir = tmp.path().join("schemas");
1238        fs::create_dir_all(&schemas_dir).unwrap();
1239        fs::write(&schemas_dir.join("test.json"), SCHEMA).unwrap();
1240
1241        fs::write(tmp.path().join("lintel.toml"), "").unwrap();
1242
1243        let sub = tmp.path().join("deeply/nested");
1244        fs::create_dir_all(&sub).unwrap();
1245        let f = sub.join("config.json");
1246        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#).unwrap();
1247
1248        let pattern = sub.join("*.json").to_string_lossy().to_string();
1249        let c = ValidateArgs {
1250            globs: vec![pattern],
1251            exclude: vec![],
1252            cache_dir: None,
1253            no_cache: true,
1254            no_catalog: true,
1255            format: None,
1256            config_dir: Some(tmp.path().to_path_buf()),
1257        };
1258
1259        let result = run(&c, mock(&[])).await.unwrap();
1260        assert!(!result.has_errors());
1261    }
1262
1263    // --- Format validation override ---
1264
1265    const FORMAT_SCHEMA: &str = r#"{
1266        "type": "object",
1267        "properties": {
1268            "link": { "type": "string", "format": "uri-reference" }
1269        }
1270    }"#;
1271
1272    #[tokio::test]
1273    async fn format_errors_reported_without_override() {
1274        let tmp = tempfile::tempdir().unwrap();
1275        let schema_path = tmp.path().join("schema.json");
1276        fs::write(&schema_path, FORMAT_SCHEMA).unwrap();
1277
1278        let f = tmp.path().join("data.json");
1279        fs::write(
1280            &f,
1281            format!(
1282                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1283                schema_path.to_string_lossy()
1284            ),
1285        )
1286        .unwrap();
1287
1288        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1289        let c = ValidateArgs {
1290            globs: vec![pattern],
1291            exclude: vec![],
1292            cache_dir: None,
1293            no_cache: true,
1294            no_catalog: true,
1295            format: None,
1296            config_dir: Some(tmp.path().to_path_buf()),
1297        };
1298        let result = run(&c, mock(&[])).await.unwrap();
1299        assert!(
1300            result.has_errors(),
1301            "expected format error without override"
1302        );
1303    }
1304
1305    #[tokio::test]
1306    async fn format_errors_suppressed_with_override() {
1307        let tmp = tempfile::tempdir().unwrap();
1308        let schema_path = tmp.path().join("schema.json");
1309        fs::write(&schema_path, FORMAT_SCHEMA).unwrap();
1310
1311        let f = tmp.path().join("data.json");
1312        fs::write(
1313            &f,
1314            format!(
1315                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1316                schema_path.to_string_lossy()
1317            ),
1318        )
1319        .unwrap();
1320
1321        // Use **/data.json to match the absolute path from the tempdir.
1322        fs::write(
1323            tmp.path().join("lintel.toml"),
1324            r#"
1325[[override]]
1326files = ["**/data.json"]
1327validate_formats = false
1328"#,
1329        )
1330        .unwrap();
1331
1332        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1333        let c = ValidateArgs {
1334            globs: vec![pattern],
1335            exclude: vec![],
1336            cache_dir: None,
1337            no_cache: true,
1338            no_catalog: true,
1339            format: None,
1340            config_dir: Some(tmp.path().to_path_buf()),
1341        };
1342        let result = run(&c, mock(&[])).await.unwrap();
1343        assert!(
1344            !result.has_errors(),
1345            "expected no errors with validate_formats = false override"
1346        );
1347    }
1348}