1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use lintel_diagnostics::reporter::{CheckResult, CheckedFile};
11use lintel_diagnostics::{DEFAULT_LABEL, LintelDiagnostic, find_instance_path_span, format_label};
12use lintel_schema_cache::{CacheStatus, SchemaCache};
13use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
14use schema_catalog::{CompiledCatalog, FileFormat};
15
16use crate::catalog;
17use crate::discover;
18use crate::parsers::{self, Parser};
19use crate::registry;
20
21const FD_CONCURRENCY_LIMIT: usize = 128;
25
26struct LocalRetriever {
29 http: SchemaCache,
30}
31
32#[async_trait::async_trait]
33impl jsonschema::AsyncRetrieve for LocalRetriever {
34 async fn retrieve(
35 &self,
36 uri: &jsonschema::Uri<String>,
37 ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
38 let s = uri.as_str();
39 if let Some(raw) = s.strip_prefix("file://") {
40 let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
41 let content = tokio::fs::read_to_string(path.as_ref()).await?;
42 Ok(serde_json::from_str(&content)?)
43 } else {
44 self.http.retrieve(uri).await
45 }
46 }
47}
48
49pub struct ValidateArgs {
50 pub globs: Vec<String>,
52
53 pub exclude: Vec<String>,
55
56 pub cache_dir: Option<String>,
58
59 pub force_schema_fetch: bool,
61
62 pub force_validation: bool,
64
65 pub no_catalog: bool,
67
68 pub config_dir: Option<PathBuf>,
70
71 pub schema_cache_ttl: Option<core::time::Duration>,
73}
74
75struct ParsedFile {
81 path: String,
82 content: String,
83 instance: Value,
84 original_schema_uri: String,
86}
87
88#[tracing::instrument(skip_all)]
96pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
97 let start_dir = match search_dir {
98 Some(d) => d.to_path_buf(),
99 None => match std::env::current_dir() {
100 Ok(d) => d,
101 Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
102 },
103 };
104
105 let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
106 return (lintel_config::Config::default(), start_dir, None);
107 };
108
109 let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
110 let cfg = lintel_config::find_and_load(&start_dir)
111 .ok()
112 .flatten()
113 .unwrap_or_default();
114 (cfg, dir, Some(config_path))
115}
116
117#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
127pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
128 if globs.is_empty() {
129 return discover::discover_files(".", exclude);
130 }
131
132 let mut result = Vec::new();
133 for pattern in globs {
134 let path = Path::new(pattern);
135 if path.is_dir() {
136 result.extend(discover::discover_files(pattern, exclude)?);
137 } else {
138 for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
139 let path = entry?;
140 if path.is_file() && !is_excluded(&path, exclude) {
141 result.push(path);
142 }
143 }
144 }
145 }
146 Ok(result)
147}
148
149fn is_excluded(path: &Path, excludes: &[String]) -> bool {
150 let path_str = match path.to_str() {
151 Some(s) => s.strip_prefix("./").unwrap_or(s),
152 None => return false,
153 };
154 excludes
155 .iter()
156 .any(|pattern| glob_match::glob_match(pattern, path_str))
157}
158
159pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
168 use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
169 const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
170
171 for fmt in FORMATS {
172 let parser = parsers::parser_for(fmt);
173 if let Ok(val) = parser.parse(content, file_name) {
174 return Some((fmt, val));
175 }
176 }
177 None
178}
179
180enum FileResult {
183 Parsed {
184 schema_uri: String,
185 parsed: ParsedFile,
186 },
187 Error(LintelDiagnostic),
188 Skip,
189}
190
191fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
197 if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
198 return schema_uri.to_string();
199 }
200 if let Some(dir) = base_dir {
201 dir.join(schema_uri).to_string_lossy().to_string()
202 } else {
203 schema_uri.to_string()
204 }
205}
206
207#[allow(clippy::too_many_arguments)]
211fn process_one_file(
212 path: &Path,
213 content: String,
214 config: &lintel_config::Config,
215 config_dir: &Path,
216 compiled_catalogs: &[CompiledCatalog],
217) -> Vec<FileResult> {
218 let path_str = path.display().to_string();
219 let file_name = path
220 .file_name()
221 .and_then(|n| n.to_str())
222 .unwrap_or(&path_str);
223
224 let detected_format = parsers::detect_format(path);
225
226 if detected_format == Some(FileFormat::Jsonl) {
228 return process_jsonl_file(
229 path,
230 &path_str,
231 file_name,
232 &content,
233 config,
234 config_dir,
235 compiled_catalogs,
236 );
237 }
238
239 if detected_format.is_none() {
241 let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
242 || compiled_catalogs
243 .iter()
244 .any(|cat| cat.find_schema(&path_str, file_name).is_some());
245 if !has_match {
246 return vec![FileResult::Skip];
247 }
248 }
249
250 let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
252 let parser = parsers::parser_for(fmt);
253 match parser.parse(&content, &path_str) {
254 Ok(val) => (parser, val),
255 Err(parse_err) => return vec![FileResult::Error(parse_err)],
256 }
257 } else {
258 match try_parse_all(&content, &path_str) {
259 Some((fmt, val)) => (parsers::parser_for(fmt), val),
260 None => return vec![FileResult::Skip],
261 }
262 };
263
264 if instance.is_null() {
266 return vec![FileResult::Skip];
267 }
268
269 let inline_uri = parser.extract_schema_uri(&content, &instance);
277 let from_inline = inline_uri.is_some();
278 let schema_uri = inline_uri
279 .or_else(|| {
280 config
281 .find_schema_mapping(&path_str, file_name)
282 .map(str::to_string)
283 })
284 .or_else(|| {
285 compiled_catalogs
286 .iter()
287 .find_map(|cat| cat.find_schema(&path_str, file_name))
288 .map(str::to_string)
289 });
290
291 let Some(schema_uri) = schema_uri else {
292 return vec![FileResult::Skip];
293 };
294
295 let original_schema_uri = schema_uri.clone();
297
298 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
300 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
301
302 let schema_uri = resolve_local_schema_path(
306 &schema_uri,
307 if from_inline {
308 path.parent()
309 } else {
310 Some(config_dir)
311 },
312 );
313
314 vec![FileResult::Parsed {
315 schema_uri,
316 parsed: ParsedFile {
317 path: path_str,
318 content,
319 instance,
320 original_schema_uri,
321 },
322 }]
323}
324
325#[allow(clippy::too_many_arguments)]
333fn process_jsonl_file(
334 path: &Path,
335 path_str: &str,
336 file_name: &str,
337 content: &str,
338 config: &lintel_config::Config,
339 config_dir: &Path,
340 compiled_catalogs: &[CompiledCatalog],
341) -> Vec<FileResult> {
342 let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
343 Ok(lines) => lines,
344 Err(parse_err) => return vec![FileResult::Error(parse_err)],
345 };
346
347 if lines.is_empty() {
348 return vec![FileResult::Skip];
349 }
350
351 let mut results = Vec::with_capacity(lines.len());
352
353 if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
355 for m in mismatches {
356 results.push(FileResult::Error(LintelDiagnostic::SchemaMismatch {
357 path: path_str.to_string(),
358 line_number: m.line_number,
359 message: format!("expected consistent $schema but found {}", m.schema_uri),
360 }));
361 }
362 }
363
364 for line in lines {
365 let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
368 let from_inline = inline_uri.is_some();
369 let schema_uri = inline_uri
370 .or_else(|| {
371 config
372 .find_schema_mapping(path_str, file_name)
373 .map(str::to_string)
374 })
375 .or_else(|| {
376 compiled_catalogs
377 .iter()
378 .find_map(|cat| cat.find_schema(path_str, file_name))
379 .map(str::to_string)
380 });
381
382 let Some(schema_uri) = schema_uri else {
383 continue;
384 };
385
386 let original_schema_uri = schema_uri.clone();
387
388 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
389 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
390
391 let schema_uri = resolve_local_schema_path(
393 &schema_uri,
394 if from_inline {
395 path.parent()
396 } else {
397 Some(config_dir)
398 },
399 );
400
401 let line_path = format!("{path_str}:{}", line.line_number);
402
403 results.push(FileResult::Parsed {
404 schema_uri,
405 parsed: ParsedFile {
406 path: line_path,
407 content: line.raw,
408 instance: line.value,
409 original_schema_uri,
410 },
411 });
412 }
413
414 if results.is_empty() {
415 vec![FileResult::Skip]
416 } else {
417 results
418 }
419}
420
421#[tracing::instrument(skip_all, fields(file_count = files.len()))]
428pub async fn read_files(
429 files: &[PathBuf],
430 errors: &mut Vec<LintelDiagnostic>,
431) -> Vec<(PathBuf, String)> {
432 let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
433 let mut read_set = tokio::task::JoinSet::new();
434 for path in files {
435 let path = path.clone();
436 let sem = semaphore.clone();
437 read_set.spawn(async move {
438 let _permit = sem.acquire().await.expect("semaphore closed");
439 let result = tokio::fs::read_to_string(&path).await;
440 (path, result)
441 });
442 }
443
444 let mut file_contents = Vec::with_capacity(files.len());
445 while let Some(result) = read_set.join_next().await {
446 match result {
447 Ok((path, Ok(content))) => file_contents.push((path, content)),
448 Ok((path, Err(e))) => {
449 errors.push(LintelDiagnostic::Io {
450 path: path.display().to_string(),
451 message: format!("failed to read: {e}"),
452 });
453 }
454 Err(e) => tracing::warn!("file read task panicked: {e}"),
455 }
456 }
457
458 file_contents
459}
460
461#[tracing::instrument(skip_all, fields(file_count = file_contents.len()))]
464#[allow(clippy::too_many_arguments)]
465fn parse_and_group_contents(
466 file_contents: Vec<(PathBuf, String)>,
467 config: &lintel_config::Config,
468 config_dir: &Path,
469 compiled_catalogs: &[CompiledCatalog],
470 errors: &mut Vec<LintelDiagnostic>,
471) -> BTreeMap<String, Vec<ParsedFile>> {
472 let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
473 for (path, content) in file_contents {
474 let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
475 for result in results {
476 match result {
477 FileResult::Parsed { schema_uri, parsed } => {
478 schema_groups.entry(schema_uri).or_default().push(parsed);
479 }
480 FileResult::Error(e) => errors.push(e),
481 FileResult::Skip => {}
482 }
483 }
484 }
485
486 schema_groups
487}
488
489#[allow(clippy::too_many_arguments)]
498async fn fetch_schema_from_prefetched(
499 schema_uri: &str,
500 prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
501 local_cache: &mut HashMap<String, Value>,
502 group: &[ParsedFile],
503 errors: &mut Vec<LintelDiagnostic>,
504 checked: &mut Vec<CheckedFile>,
505 on_check: &mut impl FnMut(&CheckedFile),
506) -> Option<(Value, Option<CacheStatus>)> {
507 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
508
509 let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
510 match prefetched.get(schema_uri) {
511 Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
512 Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
513 None => Err(format!("schema not prefetched: {schema_uri}")),
514 }
515 } else if let Some(cached) = local_cache.get(schema_uri) {
516 Ok((cached.clone(), None))
517 } else {
518 tokio::fs::read_to_string(schema_uri)
519 .await
520 .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
521 .and_then(|content| {
522 serde_json::from_str::<Value>(&content)
523 .map(|v| {
524 local_cache.insert(schema_uri.to_string(), v.clone());
525 (v, None)
526 })
527 .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
528 })
529 };
530
531 match result {
532 Ok(value) => Some(value),
533 Err(message) => {
534 report_group_error(
535 |path| LintelDiagnostic::SchemaFetch {
536 path: path.to_string(),
537 message: message.clone(),
538 },
539 schema_uri,
540 None,
541 group,
542 errors,
543 checked,
544 on_check,
545 );
546 None
547 }
548 }
549}
550
551#[allow(clippy::too_many_arguments)]
553fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
554 make_error: impl Fn(&str) -> LintelDiagnostic,
555 schema_uri: &str,
556 cache_status: Option<CacheStatus>,
557 group: &[P],
558 errors: &mut Vec<LintelDiagnostic>,
559 checked: &mut Vec<CheckedFile>,
560 on_check: &mut impl FnMut(&CheckedFile),
561) {
562 for item in group {
563 let pf = item.borrow();
564 let cf = CheckedFile {
565 path: pf.path.clone(),
566 schema: schema_uri.to_string(),
567 cache_status,
568 validation_cache_status: None,
569 };
570 on_check(&cf);
571 checked.push(cf);
572 errors.push(make_error(&pf.path));
573 }
574}
575
576#[allow(clippy::too_many_arguments)]
578fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
579 schema_uri: &str,
580 cache_status: Option<CacheStatus>,
581 validation_cache_status: Option<ValidationCacheStatus>,
582 group: &[P],
583 checked: &mut Vec<CheckedFile>,
584 on_check: &mut impl FnMut(&CheckedFile),
585) {
586 for item in group {
587 let pf = item.borrow();
588 let cf = CheckedFile {
589 path: pf.path.clone(),
590 schema: schema_uri.to_string(),
591 cache_status,
592 validation_cache_status,
593 };
594 on_check(&cf);
595 checked.push(cf);
596 }
597}
598
599fn clean_error_message(msg: String) -> String {
608 const MARKER: &str = " is not valid under any of the schemas listed in the '";
609 if let Some(pos) = msg.find(MARKER) {
610 return msg[pos + 4..].to_string();
612 }
613 msg
614}
615
616fn push_validation_errors(
618 pf: &ParsedFile,
619 schema_url: &str,
620 validation_errors: &[ValidationError],
621 errors: &mut Vec<LintelDiagnostic>,
622) {
623 for ve in validation_errors {
624 let span = find_instance_path_span(&pf.content, &ve.instance_path);
625 let instance_path = if ve.instance_path.is_empty() {
626 DEFAULT_LABEL.to_string()
627 } else {
628 ve.instance_path.clone()
629 };
630 let label = format_label(&instance_path, &ve.schema_path);
631 let source_span: miette::SourceSpan = span.into();
632 errors.push(LintelDiagnostic::Validation {
633 src: miette::NamedSource::new(&pf.path, pf.content.clone()),
634 span: source_span,
635 schema_span: source_span,
636 path: pf.path.clone(),
637 instance_path,
638 label,
639 message: ve.message.clone(),
640 schema_url: schema_url.to_string(),
641 schema_path: ve.schema_path.clone(),
642 });
643 }
644}
645
646#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
649#[allow(clippy::too_many_arguments)]
650async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
651 validator: &jsonschema::Validator,
652 schema_uri: &str,
653 schema_hash: &str,
654 validate_formats: bool,
655 cache_status: Option<CacheStatus>,
656 group: &[P],
657 vcache: &lintel_validation_cache::ValidationCache,
658 errors: &mut Vec<LintelDiagnostic>,
659 checked: &mut Vec<CheckedFile>,
660 on_check: &mut impl FnMut(&CheckedFile),
661) {
662 for item in group {
663 let pf = item.borrow();
664 let file_errors: Vec<ValidationError> = validator
665 .iter_errors(&pf.instance)
666 .map(|error| ValidationError {
667 instance_path: error.instance_path().to_string(),
668 message: clean_error_message(error.to_string()),
669 schema_path: error.schema_path().to_string(),
670 })
671 .collect();
672
673 vcache
674 .store(
675 &lintel_validation_cache::CacheKey {
676 file_content: &pf.content,
677 schema_hash,
678 validate_formats,
679 },
680 &file_errors,
681 )
682 .await;
683 push_validation_errors(pf, schema_uri, &file_errors, errors);
684
685 let cf = CheckedFile {
686 path: pf.path.clone(),
687 schema: schema_uri.to_string(),
688 cache_status,
689 validation_cache_status: Some(ValidationCacheStatus::Miss),
690 };
691 on_check(&cf);
692 checked.push(cf);
693 }
694}
695
696pub async fn fetch_compiled_catalogs(
704 retriever: &SchemaCache,
705 config: &lintel_config::Config,
706 no_catalog: bool,
707) -> Vec<CompiledCatalog> {
708 let mut compiled_catalogs = Vec::new();
709
710 if !no_catalog {
711 let catalog_span = tracing::info_span!("fetch_catalogs").entered();
712
713 #[allow(clippy::items_after_statements)]
717 type CatalogResult = (
718 usize, String,
720 Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
721 );
722 let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
723
724 for (i, registry_url) in config.registries.iter().enumerate() {
726 let r = retriever.clone();
727 let url = registry_url.clone();
728 let label = format!("registry {url}");
729 catalog_tasks.spawn(async move {
730 let result = registry::fetch(&r, &url)
731 .await
732 .map(|cat| CompiledCatalog::compile(&cat));
733 (i, label, result)
734 });
735 }
736
737 let lintel_order = config.registries.len();
739 if !config.no_default_catalog {
740 let r = retriever.clone();
741 let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
742 catalog_tasks.spawn(async move {
743 let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
744 .await
745 .map(|cat| CompiledCatalog::compile(&cat));
746 (lintel_order, label, result)
747 });
748 }
749
750 let schemastore_order = config.registries.len() + 1;
752 let r = retriever.clone();
753 catalog_tasks.spawn(async move {
754 let result = catalog::fetch_catalog(&r)
755 .await
756 .map(|cat| CompiledCatalog::compile(&cat));
757 (schemastore_order, "SchemaStore catalog".to_string(), result)
758 });
759
760 let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
761 while let Some(result) = catalog_tasks.join_next().await {
762 match result {
763 Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
764 Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
765 Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
766 }
767 }
768 results.sort_by_key(|(order, _)| *order);
769 compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
770
771 drop(catalog_span);
772 }
773
774 compiled_catalogs
775}
776
777pub async fn run(args: &ValidateArgs) -> Result<CheckResult> {
781 run_with(args, None, |_| {}).await
782}
783
784#[tracing::instrument(skip_all, name = "validate")]
791pub async fn run_with(
792 args: &ValidateArgs,
793 cache: Option<SchemaCache>,
794 mut on_check: impl FnMut(&CheckedFile),
795) -> Result<CheckResult> {
796 let retriever = build_retriever(args, cache);
797 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
798 let files = collect_files(&args.globs, &args.exclude)?;
799 tracing::info!(file_count = files.len(), "collected files");
800
801 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
802
803 let mut errors: Vec<LintelDiagnostic> = Vec::new();
804 let file_contents = read_files(&files, &mut errors).await;
805
806 run_with_contents_inner(
807 file_contents,
808 args,
809 retriever,
810 config,
811 &config_dir,
812 compiled_catalogs,
813 errors,
814 &mut on_check,
815 )
816 .await
817}
818
819pub async fn run_with_contents(
827 args: &ValidateArgs,
828 file_contents: Vec<(PathBuf, String)>,
829 cache: Option<SchemaCache>,
830 mut on_check: impl FnMut(&CheckedFile),
831) -> Result<CheckResult> {
832 let retriever = build_retriever(args, cache);
833 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
834 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
835 let errors: Vec<LintelDiagnostic> = Vec::new();
836
837 run_with_contents_inner(
838 file_contents,
839 args,
840 retriever,
841 config,
842 &config_dir,
843 compiled_catalogs,
844 errors,
845 &mut on_check,
846 )
847 .await
848}
849
850fn build_retriever(args: &ValidateArgs, cache: Option<SchemaCache>) -> SchemaCache {
851 if let Some(c) = cache {
852 return c;
853 }
854 let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
855 if let Some(dir) = &args.cache_dir {
856 let path = PathBuf::from(dir);
857 let _ = fs::create_dir_all(&path);
858 builder = builder.cache_dir(path);
859 }
860 if let Some(ttl) = args.schema_cache_ttl {
861 builder = builder.ttl(ttl);
862 }
863 builder.build()
864}
865
866#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
867async fn run_with_contents_inner(
868 file_contents: Vec<(PathBuf, String)>,
869 args: &ValidateArgs,
870 retriever: SchemaCache,
871 config: lintel_config::Config,
872 config_dir: &Path,
873 compiled_catalogs: Vec<CompiledCatalog>,
874 mut errors: Vec<LintelDiagnostic>,
875 on_check: &mut impl FnMut(&CheckedFile),
876) -> Result<CheckResult> {
877 let mut checked: Vec<CheckedFile> = Vec::new();
878
879 let schema_groups = parse_and_group_contents(
881 file_contents,
882 &config,
883 config_dir,
884 &compiled_catalogs,
885 &mut errors,
886 );
887 tracing::info!(
888 schema_count = schema_groups.len(),
889 total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
890 "grouped files by schema"
891 );
892
893 let vcache = lintel_validation_cache::ValidationCache::new(
895 lintel_validation_cache::ensure_cache_dir(),
896 args.force_validation,
897 );
898
899 let remote_uris: Vec<&String> = schema_groups
901 .keys()
902 .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
903 .collect();
904
905 let prefetched = {
906 let _prefetch_span =
907 tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
908
909 let mut schema_tasks = tokio::task::JoinSet::new();
910 for uri in remote_uris {
911 let r = retriever.clone();
912 let u = uri.clone();
913 schema_tasks.spawn(async move {
914 let result = r.fetch(&u).await;
915 (u, result)
916 });
917 }
918
919 let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
920 while let Some(result) = schema_tasks.join_next().await {
921 match result {
922 Ok((uri, fetch_result)) => {
923 prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
924 }
925 Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
926 }
927 }
928
929 prefetched
930 };
931
932 let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
934 let mut fetch_time = core::time::Duration::ZERO;
935 let mut hash_time = core::time::Duration::ZERO;
936 let mut vcache_time = core::time::Duration::ZERO;
937 let mut compile_time = core::time::Duration::ZERO;
938 let mut validate_time = core::time::Duration::ZERO;
939
940 for (schema_uri, group) in &schema_groups {
941 let _group_span = tracing::debug_span!(
942 "schema_group",
943 schema = schema_uri.as_str(),
944 files = group.len(),
945 )
946 .entered();
947
948 let validate_formats = group.iter().all(|pf| {
951 config
952 .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
953 });
954
955 let t = std::time::Instant::now();
958 let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
959 schema_uri,
960 &prefetched,
961 &mut local_schema_cache,
962 group,
963 &mut errors,
964 &mut checked,
965 on_check,
966 )
967 .await
968 else {
969 fetch_time += t.elapsed();
970 continue;
971 };
972 fetch_time += t.elapsed();
973
974 let t = std::time::Instant::now();
976 let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
977 hash_time += t.elapsed();
978
979 let mut cache_misses: Vec<&ParsedFile> = Vec::new();
981
982 let t = std::time::Instant::now();
983 for pf in group {
984 let (cached, vcache_status) = vcache
985 .lookup(&lintel_validation_cache::CacheKey {
986 file_content: &pf.content,
987 schema_hash: &schema_hash,
988 validate_formats,
989 })
990 .await;
991
992 if let Some(cached_errors) = cached {
993 push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
994 let cf = CheckedFile {
995 path: pf.path.clone(),
996 schema: schema_uri.clone(),
997 cache_status,
998 validation_cache_status: Some(vcache_status),
999 };
1000 on_check(&cf);
1001 checked.push(cf);
1002 } else {
1003 cache_misses.push(pf);
1004 }
1005 }
1006 vcache_time += t.elapsed();
1007
1008 tracing::debug!(
1009 cache_hits = group.len() - cache_misses.len(),
1010 cache_misses = cache_misses.len(),
1011 "validation cache"
1012 );
1013
1014 if cache_misses.is_empty() {
1016 continue;
1017 }
1018
1019 let t = std::time::Instant::now();
1021 let validator = {
1022 let is_remote_schema =
1026 schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
1027 let local_retriever = LocalRetriever {
1028 http: retriever.clone(),
1029 };
1030 let opts = jsonschema::async_options()
1031 .with_retriever(local_retriever)
1032 .should_validate_formats(validate_formats);
1033 let base_uri = if is_remote_schema {
1034 let uri = match schema_uri.find('#') {
1036 Some(pos) => schema_uri[..pos].to_string(),
1037 None => schema_uri.clone(),
1038 };
1039 Some(uri)
1040 } else {
1041 std::fs::canonicalize(schema_uri)
1042 .ok()
1043 .map(|p| format!("file://{}", p.display()))
1044 };
1045 let opts = if let Some(uri) = base_uri {
1046 opts.with_base_uri(uri)
1047 } else {
1048 opts
1049 };
1050 match opts.build(&schema_value).await {
1051 Ok(v) => v,
1052 Err(e) => {
1053 compile_time += t.elapsed();
1054 if !validate_formats && e.to_string().contains("uri-reference") {
1058 mark_group_checked(
1059 schema_uri,
1060 cache_status,
1061 Some(ValidationCacheStatus::Miss),
1062 &cache_misses,
1063 &mut checked,
1064 on_check,
1065 );
1066 continue;
1067 }
1068 let msg = format!("failed to compile schema: {e}");
1069 report_group_error(
1070 |path| LintelDiagnostic::SchemaCompile {
1071 path: path.to_string(),
1072 message: msg.clone(),
1073 },
1074 schema_uri,
1075 cache_status,
1076 &cache_misses,
1077 &mut errors,
1078 &mut checked,
1079 on_check,
1080 );
1081 continue;
1082 }
1083 }
1084 };
1085 compile_time += t.elapsed();
1086
1087 let t = std::time::Instant::now();
1088 validate_group(
1089 &validator,
1090 schema_uri,
1091 &schema_hash,
1092 validate_formats,
1093 cache_status,
1094 &cache_misses,
1095 &vcache,
1096 &mut errors,
1097 &mut checked,
1098 on_check,
1099 )
1100 .await;
1101 validate_time += t.elapsed();
1102 }
1103
1104 #[allow(clippy::cast_possible_truncation)]
1105 {
1106 tracing::info!(
1107 fetch_ms = fetch_time.as_millis() as u64,
1108 hash_ms = hash_time.as_millis() as u64,
1109 vcache_ms = vcache_time.as_millis() as u64,
1110 compile_ms = compile_time.as_millis() as u64,
1111 validate_ms = validate_time.as_millis() as u64,
1112 "phase2 breakdown"
1113 );
1114 }
1115
1116 errors.sort_by(|a, b| {
1118 a.path()
1119 .cmp(b.path())
1120 .then_with(|| a.offset().cmp(&b.offset()))
1121 });
1122
1123 Ok(CheckResult { errors, checked })
1124}
1125
1126#[cfg(test)]
1127mod tests {
1128 use super::*;
1129 use lintel_schema_cache::SchemaCache;
1130 use std::path::Path;
1131
1132 fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1133 let cache = SchemaCache::memory();
1134 for (uri, body) in entries {
1135 cache.insert(
1136 uri,
1137 serde_json::from_str(body).expect("test mock: invalid JSON"),
1138 );
1139 }
1140 cache
1141 }
1142
1143 fn testdata() -> PathBuf {
1144 Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1145 }
1146
1147 fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1149 dirs.iter()
1150 .flat_map(|dir| {
1151 let base = testdata().join(dir);
1152 vec![
1153 base.join("*.json").to_string_lossy().to_string(),
1154 base.join("*.yaml").to_string_lossy().to_string(),
1155 base.join("*.yml").to_string_lossy().to_string(),
1156 base.join("*.json5").to_string_lossy().to_string(),
1157 base.join("*.jsonc").to_string_lossy().to_string(),
1158 base.join("*.toml").to_string_lossy().to_string(),
1159 ]
1160 })
1161 .collect()
1162 }
1163
1164 fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1165 ValidateArgs {
1166 globs: scenario_globs(dirs),
1167 exclude: vec![],
1168 cache_dir: None,
1169 force_schema_fetch: true,
1170 force_validation: true,
1171 no_catalog: true,
1172 config_dir: None,
1173 schema_cache_ttl: None,
1174 }
1175 }
1176
1177 const SCHEMA: &str =
1178 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1179
1180 fn schema_mock() -> SchemaCache {
1181 mock(&[("https://example.com/schema.json", SCHEMA)])
1182 }
1183
1184 #[tokio::test]
1187 async fn no_matching_files() -> anyhow::Result<()> {
1188 let tmp = tempfile::tempdir()?;
1189 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1190 let c = ValidateArgs {
1191 globs: vec![pattern],
1192 exclude: vec![],
1193 cache_dir: None,
1194 force_schema_fetch: true,
1195 force_validation: true,
1196 no_catalog: true,
1197 config_dir: None,
1198 schema_cache_ttl: None,
1199 };
1200 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1201 assert!(!result.has_errors());
1202 Ok(())
1203 }
1204
1205 #[tokio::test]
1206 async fn dir_all_valid() -> anyhow::Result<()> {
1207 let c = args_for_dirs(&["positive_tests"]);
1208 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1209 assert!(!result.has_errors());
1210 Ok(())
1211 }
1212
1213 #[tokio::test]
1214 async fn dir_all_invalid() -> anyhow::Result<()> {
1215 let c = args_for_dirs(&["negative_tests"]);
1216 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1217 assert!(result.has_errors());
1218 Ok(())
1219 }
1220
1221 #[tokio::test]
1222 async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1223 let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1224 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1225 assert!(result.has_errors());
1226 Ok(())
1227 }
1228
1229 #[tokio::test]
1230 async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1231 let c = args_for_dirs(&["no_schema"]);
1232 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1233 assert!(!result.has_errors());
1234 Ok(())
1235 }
1236
1237 #[tokio::test]
1238 async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1239 let c = args_for_dirs(&["positive_tests", "no_schema"]);
1240 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1241 assert!(!result.has_errors());
1242 Ok(())
1243 }
1244
1245 #[tokio::test]
1248 async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1249 let dir = testdata().join("positive_tests");
1250 let c = ValidateArgs {
1251 globs: vec![dir.to_string_lossy().to_string()],
1252 exclude: vec![],
1253 cache_dir: None,
1254 force_schema_fetch: true,
1255 force_validation: true,
1256 no_catalog: true,
1257 config_dir: None,
1258 schema_cache_ttl: None,
1259 };
1260 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1261 assert!(!result.has_errors());
1262 assert!(result.files_checked() > 0);
1263 Ok(())
1264 }
1265
1266 #[tokio::test]
1267 async fn multiple_directory_args() -> anyhow::Result<()> {
1268 let pos_dir = testdata().join("positive_tests");
1269 let no_schema_dir = testdata().join("no_schema");
1270 let c = ValidateArgs {
1271 globs: vec![
1272 pos_dir.to_string_lossy().to_string(),
1273 no_schema_dir.to_string_lossy().to_string(),
1274 ],
1275 exclude: vec![],
1276 cache_dir: None,
1277 force_schema_fetch: true,
1278 force_validation: true,
1279 no_catalog: true,
1280 config_dir: None,
1281 schema_cache_ttl: None,
1282 };
1283 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1284 assert!(!result.has_errors());
1285 Ok(())
1286 }
1287
1288 #[tokio::test]
1289 async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1290 let dir = testdata().join("positive_tests");
1291 let glob_pattern = testdata()
1292 .join("no_schema")
1293 .join("*.json")
1294 .to_string_lossy()
1295 .to_string();
1296 let c = ValidateArgs {
1297 globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1298 exclude: vec![],
1299 cache_dir: None,
1300 force_schema_fetch: true,
1301 force_validation: true,
1302 no_catalog: true,
1303 config_dir: None,
1304 schema_cache_ttl: None,
1305 };
1306 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1307 assert!(!result.has_errors());
1308 Ok(())
1309 }
1310
1311 #[tokio::test]
1312 async fn malformed_json_parse_error() -> anyhow::Result<()> {
1313 let base = testdata().join("malformed");
1314 let c = ValidateArgs {
1315 globs: vec![base.join("*.json").to_string_lossy().to_string()],
1316 exclude: vec![],
1317 cache_dir: None,
1318 force_schema_fetch: true,
1319 force_validation: true,
1320 no_catalog: true,
1321 config_dir: None,
1322 schema_cache_ttl: None,
1323 };
1324 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1325 assert!(result.has_errors());
1326 Ok(())
1327 }
1328
1329 #[tokio::test]
1330 async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1331 let base = testdata().join("malformed");
1332 let c = ValidateArgs {
1333 globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1334 exclude: vec![],
1335 cache_dir: None,
1336 force_schema_fetch: true,
1337 force_validation: true,
1338 no_catalog: true,
1339 config_dir: None,
1340 schema_cache_ttl: None,
1341 };
1342 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1343 assert!(result.has_errors());
1344 Ok(())
1345 }
1346
1347 #[tokio::test]
1350 async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1351 let base = testdata().join("negative_tests");
1352 let c = ValidateArgs {
1353 globs: scenario_globs(&["positive_tests", "negative_tests"]),
1354 exclude: vec![
1355 base.join("missing_name.json").to_string_lossy().to_string(),
1356 base.join("missing_name.toml").to_string_lossy().to_string(),
1357 base.join("missing_name.yaml").to_string_lossy().to_string(),
1358 ],
1359 cache_dir: None,
1360 force_schema_fetch: true,
1361 force_validation: true,
1362 no_catalog: true,
1363 config_dir: None,
1364 schema_cache_ttl: None,
1365 };
1366 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1367 assert!(!result.has_errors());
1368 Ok(())
1369 }
1370
1371 #[tokio::test]
1374 async fn custom_cache_dir() -> anyhow::Result<()> {
1375 let c = ValidateArgs {
1376 globs: scenario_globs(&["positive_tests"]),
1377 exclude: vec![],
1378 cache_dir: None,
1379 force_schema_fetch: true,
1380 force_validation: true,
1381 no_catalog: true,
1382 config_dir: None,
1383 schema_cache_ttl: None,
1384 };
1385 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1386 assert!(!result.has_errors());
1387 Ok(())
1388 }
1389
1390 #[tokio::test]
1393 async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1394 let tmp = tempfile::tempdir()?;
1395 let schema_path = tmp.path().join("schema.json");
1396 fs::write(&schema_path, SCHEMA)?;
1397
1398 let f = tmp.path().join("valid.json");
1399 fs::write(
1400 &f,
1401 format!(
1402 r#"{{"$schema":"{}","name":"hello"}}"#,
1403 schema_path.to_string_lossy()
1404 ),
1405 )?;
1406
1407 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1408 let c = ValidateArgs {
1409 globs: vec![pattern],
1410 exclude: vec![],
1411 cache_dir: None,
1412 force_schema_fetch: true,
1413 force_validation: true,
1414 no_catalog: true,
1415 config_dir: None,
1416 schema_cache_ttl: None,
1417 };
1418 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1419 assert!(!result.has_errors());
1420 Ok(())
1421 }
1422
1423 #[tokio::test]
1424 async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1425 let tmp = tempfile::tempdir()?;
1426 let schema_path = tmp.path().join("schema.json");
1427 fs::write(&schema_path, SCHEMA)?;
1428
1429 let f = tmp.path().join("valid.yaml");
1430 fs::write(
1431 &f,
1432 format!(
1433 "# yaml-language-server: $schema={}\nname: hello\n",
1434 schema_path.to_string_lossy()
1435 ),
1436 )?;
1437
1438 let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1439 let c = ValidateArgs {
1440 globs: vec![pattern],
1441 exclude: vec![],
1442 cache_dir: None,
1443 force_schema_fetch: true,
1444 force_validation: true,
1445 no_catalog: true,
1446 config_dir: None,
1447 schema_cache_ttl: None,
1448 };
1449 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1450 assert!(!result.has_errors());
1451 Ok(())
1452 }
1453
1454 #[tokio::test]
1455 async fn missing_local_schema_errors() -> anyhow::Result<()> {
1456 let tmp = tempfile::tempdir()?;
1457 let f = tmp.path().join("ref.json");
1458 fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1459
1460 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1461 let c = ValidateArgs {
1462 globs: vec![pattern],
1463 exclude: vec![],
1464 cache_dir: None,
1465 force_schema_fetch: true,
1466 force_validation: true,
1467 no_catalog: true,
1468 config_dir: None,
1469 schema_cache_ttl: None,
1470 };
1471 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1472 assert!(result.has_errors());
1473 Ok(())
1474 }
1475
1476 #[tokio::test]
1479 async fn json5_valid_with_schema() -> anyhow::Result<()> {
1480 let tmp = tempfile::tempdir()?;
1481 let schema_path = tmp.path().join("schema.json");
1482 fs::write(&schema_path, SCHEMA)?;
1483
1484 let f = tmp.path().join("config.json5");
1485 fs::write(
1486 &f,
1487 format!(
1488 r#"{{
1489 // JSON5 comment
1490 "$schema": "{}",
1491 name: "hello",
1492}}"#,
1493 schema_path.to_string_lossy()
1494 ),
1495 )?;
1496
1497 let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1498 let c = ValidateArgs {
1499 globs: vec![pattern],
1500 exclude: vec![],
1501 cache_dir: None,
1502 force_schema_fetch: true,
1503 force_validation: true,
1504 no_catalog: true,
1505 config_dir: None,
1506 schema_cache_ttl: None,
1507 };
1508 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1509 assert!(!result.has_errors());
1510 Ok(())
1511 }
1512
1513 #[tokio::test]
1514 async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1515 let tmp = tempfile::tempdir()?;
1516 let schema_path = tmp.path().join("schema.json");
1517 fs::write(&schema_path, SCHEMA)?;
1518
1519 let f = tmp.path().join("config.jsonc");
1520 fs::write(
1521 &f,
1522 format!(
1523 r#"{{
1524 /* JSONC comment */
1525 "$schema": "{}",
1526 "name": "hello"
1527}}"#,
1528 schema_path.to_string_lossy()
1529 ),
1530 )?;
1531
1532 let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1533 let c = ValidateArgs {
1534 globs: vec![pattern],
1535 exclude: vec![],
1536 cache_dir: None,
1537 force_schema_fetch: true,
1538 force_validation: true,
1539 no_catalog: true,
1540 config_dir: None,
1541 schema_cache_ttl: None,
1542 };
1543 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1544 assert!(!result.has_errors());
1545 Ok(())
1546 }
1547
1548 const GH_WORKFLOW_SCHEMA: &str = r#"{
1551 "type": "object",
1552 "properties": {
1553 "name": { "type": "string" },
1554 "on": {},
1555 "jobs": { "type": "object" }
1556 },
1557 "required": ["on", "jobs"]
1558 }"#;
1559
1560 fn gh_catalog_json() -> String {
1561 r#"{"version":1,"schemas":[{
1562 "name": "GitHub Workflow",
1563 "description": "GitHub Actions workflow",
1564 "url": "https://www.schemastore.org/github-workflow.json",
1565 "fileMatch": [
1566 "**/.github/workflows/*.yml",
1567 "**/.github/workflows/*.yaml"
1568 ]
1569 }]}"#
1570 .to_string()
1571 }
1572
1573 #[tokio::test]
1574 async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1575 let tmp = tempfile::tempdir()?;
1576 let cache_tmp = tempfile::tempdir()?;
1577 let wf_dir = tmp.path().join(".github/workflows");
1578 fs::create_dir_all(&wf_dir)?;
1579 fs::write(
1580 wf_dir.join("ci.yml"),
1581 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1582 )?;
1583
1584 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1585 let client = mock(&[
1586 (
1587 "https://www.schemastore.org/api/json/catalog.json",
1588 &gh_catalog_json(),
1589 ),
1590 (
1591 "https://www.schemastore.org/github-workflow.json",
1592 GH_WORKFLOW_SCHEMA,
1593 ),
1594 ]);
1595 let c = ValidateArgs {
1596 globs: vec![pattern],
1597 exclude: vec![],
1598 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1599 force_schema_fetch: true,
1600 force_validation: true,
1601 no_catalog: false,
1602 config_dir: None,
1603 schema_cache_ttl: None,
1604 };
1605 let result = run_with(&c, Some(client), |_| {}).await?;
1606 assert!(!result.has_errors());
1607 Ok(())
1608 }
1609
1610 #[tokio::test]
1611 async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1612 let tmp = tempfile::tempdir()?;
1613 let cache_tmp = tempfile::tempdir()?;
1614 let wf_dir = tmp.path().join(".github/workflows");
1615 fs::create_dir_all(&wf_dir)?;
1616 fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1617
1618 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1619 let client = mock(&[
1620 (
1621 "https://www.schemastore.org/api/json/catalog.json",
1622 &gh_catalog_json(),
1623 ),
1624 (
1625 "https://www.schemastore.org/github-workflow.json",
1626 GH_WORKFLOW_SCHEMA,
1627 ),
1628 ]);
1629 let c = ValidateArgs {
1630 globs: vec![pattern],
1631 exclude: vec![],
1632 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1633 force_schema_fetch: true,
1634 force_validation: true,
1635 no_catalog: false,
1636 config_dir: None,
1637 schema_cache_ttl: None,
1638 };
1639 let result = run_with(&c, Some(client), |_| {}).await?;
1640 assert!(result.has_errors());
1641 Ok(())
1642 }
1643
1644 #[tokio::test]
1645 async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1646 let tmp = tempfile::tempdir()?;
1647 let cache_tmp = tempfile::tempdir()?;
1648 let wf_dir = tmp.path().join(".github/workflows");
1649 fs::create_dir_all(&wf_dir)?;
1650 fs::write(
1651 wf_dir.join("ci.yml"),
1652 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1653 )?;
1654
1655 let client = mock(&[
1656 (
1657 "https://www.schemastore.org/api/json/catalog.json",
1658 &gh_catalog_json(),
1659 ),
1660 (
1661 "https://www.schemastore.org/github-workflow.json",
1662 GH_WORKFLOW_SCHEMA,
1663 ),
1664 ]);
1665 let c = ValidateArgs {
1666 globs: vec![],
1667 exclude: vec![],
1668 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1669 force_schema_fetch: true,
1670 force_validation: true,
1671 no_catalog: false,
1672 config_dir: None,
1673 schema_cache_ttl: None,
1674 };
1675
1676 let orig_dir = std::env::current_dir()?;
1677 std::env::set_current_dir(tmp.path())?;
1678 let result = run_with(&c, Some(client), |_| {}).await?;
1679 std::env::set_current_dir(orig_dir)?;
1680
1681 assert!(!result.has_errors());
1682 Ok(())
1683 }
1684
1685 #[tokio::test]
1688 async fn toml_valid_with_schema() -> anyhow::Result<()> {
1689 let tmp = tempfile::tempdir()?;
1690 let schema_path = tmp.path().join("schema.json");
1691 fs::write(&schema_path, SCHEMA)?;
1692
1693 let f = tmp.path().join("config.toml");
1694 fs::write(
1695 &f,
1696 format!(
1697 "# :schema {}\nname = \"hello\"\n",
1698 schema_path.to_string_lossy()
1699 ),
1700 )?;
1701
1702 let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1703 let c = ValidateArgs {
1704 globs: vec![pattern],
1705 exclude: vec![],
1706 cache_dir: None,
1707 force_schema_fetch: true,
1708 force_validation: true,
1709 no_catalog: true,
1710 config_dir: None,
1711 schema_cache_ttl: None,
1712 };
1713 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1714 assert!(!result.has_errors());
1715 Ok(())
1716 }
1717
1718 #[tokio::test]
1721 async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1722 let tmp = tempfile::tempdir()?;
1723
1724 let schemas_dir = tmp.path().join("schemas");
1725 fs::create_dir_all(&schemas_dir)?;
1726 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1727
1728 fs::write(
1729 tmp.path().join("lintel.toml"),
1730 r#"
1731[rewrite]
1732"http://localhost:9000/" = "//schemas/"
1733"#,
1734 )?;
1735
1736 let f = tmp.path().join("config.json");
1737 fs::write(
1738 &f,
1739 r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1740 )?;
1741
1742 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1743 let c = ValidateArgs {
1744 globs: vec![pattern],
1745 exclude: vec![],
1746 cache_dir: None,
1747 force_schema_fetch: true,
1748 force_validation: true,
1749 no_catalog: true,
1750 config_dir: Some(tmp.path().to_path_buf()),
1751 schema_cache_ttl: None,
1752 };
1753
1754 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1755 assert!(!result.has_errors());
1756 assert_eq!(result.files_checked(), 1);
1757 Ok(())
1758 }
1759
1760 #[tokio::test]
1761 async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1762 let tmp = tempfile::tempdir()?;
1763
1764 let schemas_dir = tmp.path().join("schemas");
1765 fs::create_dir_all(&schemas_dir)?;
1766 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1767
1768 fs::write(tmp.path().join("lintel.toml"), "")?;
1769
1770 let sub = tmp.path().join("deeply/nested");
1771 fs::create_dir_all(&sub)?;
1772 let f = sub.join("config.json");
1773 fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1774
1775 let pattern = sub.join("*.json").to_string_lossy().to_string();
1776 let c = ValidateArgs {
1777 globs: vec![pattern],
1778 exclude: vec![],
1779 cache_dir: None,
1780 force_schema_fetch: true,
1781 force_validation: true,
1782 no_catalog: true,
1783 config_dir: Some(tmp.path().to_path_buf()),
1784 schema_cache_ttl: None,
1785 };
1786
1787 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1788 assert!(!result.has_errors());
1789 Ok(())
1790 }
1791
1792 const FORMAT_SCHEMA: &str = r#"{
1795 "type": "object",
1796 "properties": {
1797 "link": { "type": "string", "format": "uri-reference" }
1798 }
1799 }"#;
1800
1801 #[tokio::test]
1802 async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1803 let tmp = tempfile::tempdir()?;
1804 let schema_path = tmp.path().join("schema.json");
1805 fs::write(&schema_path, FORMAT_SCHEMA)?;
1806
1807 let f = tmp.path().join("data.json");
1808 fs::write(
1809 &f,
1810 format!(
1811 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1812 schema_path.to_string_lossy()
1813 ),
1814 )?;
1815
1816 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1817 let c = ValidateArgs {
1818 globs: vec![pattern],
1819 exclude: vec![],
1820 cache_dir: None,
1821 force_schema_fetch: true,
1822 force_validation: true,
1823 no_catalog: true,
1824 config_dir: Some(tmp.path().to_path_buf()),
1825 schema_cache_ttl: None,
1826 };
1827 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1828 assert!(
1829 result.has_errors(),
1830 "expected format error without override"
1831 );
1832 Ok(())
1833 }
1834
1835 #[tokio::test]
1836 async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1837 let tmp = tempfile::tempdir()?;
1838 let schema_path = tmp.path().join("schema.json");
1839 fs::write(&schema_path, FORMAT_SCHEMA)?;
1840
1841 let f = tmp.path().join("data.json");
1842 fs::write(
1843 &f,
1844 format!(
1845 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1846 schema_path.to_string_lossy()
1847 ),
1848 )?;
1849
1850 fs::write(
1852 tmp.path().join("lintel.toml"),
1853 r#"
1854[[override]]
1855files = ["**/data.json"]
1856validate_formats = false
1857"#,
1858 )?;
1859
1860 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1861 let c = ValidateArgs {
1862 globs: vec![pattern],
1863 exclude: vec![],
1864 cache_dir: None,
1865 force_schema_fetch: true,
1866 force_validation: true,
1867 no_catalog: true,
1868 config_dir: Some(tmp.path().to_path_buf()),
1869 schema_cache_ttl: None,
1870 };
1871 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1872 assert!(
1873 !result.has_errors(),
1874 "expected no errors with validate_formats = false override"
1875 );
1876 Ok(())
1877 }
1878
1879 #[tokio::test]
1882 async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1883 let tmp = tempfile::tempdir()?;
1884 fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1885
1886 let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1887 let c = ValidateArgs {
1888 globs: vec![pattern],
1889 exclude: vec![],
1890 cache_dir: None,
1891 force_schema_fetch: true,
1892 force_validation: true,
1893 no_catalog: true,
1894 config_dir: Some(tmp.path().to_path_buf()),
1895 schema_cache_ttl: None,
1896 };
1897 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1898 assert!(!result.has_errors());
1899 assert_eq!(result.files_checked(), 0);
1900 Ok(())
1901 }
1902
1903 #[tokio::test]
1904 async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1905 let tmp = tempfile::tempdir()?;
1906 let cache_tmp = tempfile::tempdir()?;
1907 fs::write(
1909 tmp.path().join("myapp.cfg"),
1910 r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1911 )?;
1912
1913 let catalog_json = r#"{"version":1,"schemas":[{
1914 "name": "MyApp Config",
1915 "description": "MyApp configuration",
1916 "url": "https://example.com/myapp.schema.json",
1917 "fileMatch": ["*.cfg"]
1918 }]}"#;
1919 let schema =
1920 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1921
1922 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1923 let client = mock(&[
1924 (
1925 "https://www.schemastore.org/api/json/catalog.json",
1926 catalog_json,
1927 ),
1928 ("https://example.com/myapp.schema.json", schema),
1929 ]);
1930 let c = ValidateArgs {
1931 globs: vec![pattern],
1932 exclude: vec![],
1933 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1934 force_schema_fetch: true,
1935 force_validation: true,
1936 no_catalog: false,
1937 config_dir: Some(tmp.path().to_path_buf()),
1938 schema_cache_ttl: None,
1939 };
1940 let result = run_with(&c, Some(client), |_| {}).await?;
1941 assert!(!result.has_errors());
1942 assert_eq!(result.files_checked(), 1);
1943 Ok(())
1944 }
1945
1946 #[tokio::test]
1947 async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1948 let tmp = tempfile::tempdir()?;
1949 let cache_tmp = tempfile::tempdir()?;
1950 fs::write(
1952 tmp.path().join("myapp.cfg"),
1953 "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1954 )?;
1955
1956 let catalog_json = r#"{"version":1,"schemas":[{
1957 "name": "MyApp Config",
1958 "description": "MyApp configuration",
1959 "url": "https://example.com/myapp.schema.json",
1960 "fileMatch": ["*.cfg"]
1961 }]}"#;
1962
1963 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1964 let client = mock(&[(
1965 "https://www.schemastore.org/api/json/catalog.json",
1966 catalog_json,
1967 )]);
1968 let c = ValidateArgs {
1969 globs: vec![pattern],
1970 exclude: vec![],
1971 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1972 force_schema_fetch: true,
1973 force_validation: true,
1974 no_catalog: false,
1975 config_dir: Some(tmp.path().to_path_buf()),
1976 schema_cache_ttl: None,
1977 };
1978 let result = run_with(&c, Some(client), |_| {}).await?;
1979 assert!(!result.has_errors());
1980 assert_eq!(result.files_checked(), 0);
1981 Ok(())
1982 }
1983
1984 #[tokio::test]
1985 async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1986 let tmp = tempfile::tempdir()?;
1987 let cache_tmp = tempfile::tempdir()?;
1988 fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1990
1991 let catalog_json = r#"{"version":1,"schemas":[{
1992 "name": "MyApp Config",
1993 "description": "MyApp configuration",
1994 "url": "https://example.com/myapp.schema.json",
1995 "fileMatch": ["*.cfg"]
1996 }]}"#;
1997 let schema =
1998 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1999
2000 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2001 let client = mock(&[
2002 (
2003 "https://www.schemastore.org/api/json/catalog.json",
2004 catalog_json,
2005 ),
2006 ("https://example.com/myapp.schema.json", schema),
2007 ]);
2008 let c = ValidateArgs {
2009 globs: vec![pattern],
2010 exclude: vec![],
2011 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2012 force_schema_fetch: true,
2013 force_validation: true,
2014 no_catalog: false,
2015 config_dir: Some(tmp.path().to_path_buf()),
2016 schema_cache_ttl: None,
2017 };
2018 let result = run_with(&c, Some(client), |_| {}).await?;
2019 assert!(result.has_errors());
2020 assert_eq!(result.files_checked(), 1);
2021 Ok(())
2022 }
2023
2024 #[tokio::test]
2027 async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
2028 let tmp = tempfile::tempdir()?;
2029 let schema_path = tmp.path().join("schema.json");
2030 fs::write(&schema_path, SCHEMA)?;
2031
2032 let f = tmp.path().join("valid.json");
2033 fs::write(
2034 &f,
2035 format!(
2036 r#"{{"$schema":"{}","name":"hello"}}"#,
2037 schema_path.to_string_lossy()
2038 ),
2039 )?;
2040
2041 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2042
2043 let c = ValidateArgs {
2045 globs: vec![pattern.clone()],
2046 exclude: vec![],
2047 cache_dir: None,
2048 force_schema_fetch: true,
2049 force_validation: false,
2050 no_catalog: true,
2051 config_dir: None,
2052 schema_cache_ttl: None,
2053 };
2054 let mut first_statuses = Vec::new();
2055 let result = run_with(&c, Some(mock(&[])), |cf| {
2056 first_statuses.push(cf.validation_cache_status);
2057 })
2058 .await?;
2059 assert!(!result.has_errors());
2060 assert!(result.files_checked() > 0);
2061
2062 assert!(
2064 first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2065 "expected at least one validation cache miss on first run"
2066 );
2067
2068 let mut second_statuses = Vec::new();
2070 let result = run_with(&c, Some(mock(&[])), |cf| {
2071 second_statuses.push(cf.validation_cache_status);
2072 })
2073 .await?;
2074 assert!(!result.has_errors());
2075
2076 assert!(
2078 second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2079 "expected at least one validation cache hit on second run"
2080 );
2081 Ok(())
2082 }
2083
2084 #[test]
2087 fn clean_strips_anyof_value() {
2088 let msg =
2089 r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2090 assert_eq!(
2091 clean_error_message(msg.to_string()),
2092 "not valid under any of the schemas listed in the 'anyOf' keyword"
2093 );
2094 }
2095
2096 #[test]
2097 fn clean_strips_oneof_value() {
2098 let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2099 assert_eq!(
2100 clean_error_message(msg.to_string()),
2101 "not valid under any of the schemas listed in the 'oneOf' keyword"
2102 );
2103 }
2104
2105 #[test]
2106 fn clean_strips_long_value() {
2107 let long_value = "x".repeat(5000);
2108 let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2109 let msg = format!("{long_value}{suffix}");
2110 assert_eq!(
2111 clean_error_message(msg),
2112 "not valid under any of the schemas listed in the 'anyOf' keyword"
2113 );
2114 }
2115
2116 #[test]
2117 fn clean_preserves_type_error() {
2118 let msg = r#"12345 is not of types "null", "string""#;
2119 assert_eq!(clean_error_message(msg.to_string()), msg);
2120 }
2121
2122 #[test]
2123 fn clean_preserves_required_property() {
2124 let msg = "\"name\" is a required property";
2125 assert_eq!(clean_error_message(msg.to_string()), msg);
2126 }
2127
2128 #[tokio::test]
2132 async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2133 let tmp = tempfile::tempdir()?;
2134
2135 let schema_body = r#"{
2137 "$schema": "http://json-schema.org/draft-07/schema#",
2138 "type": "object",
2139 "properties": { "name": { "type": "string" } },
2140 "required": ["name"]
2141 }"#;
2142
2143 let schema_url = "http://json-schema.org/draft-07/schema#";
2144
2145 let f = tmp.path().join("data.json");
2146 fs::write(
2147 &f,
2148 format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2149 )?;
2150
2151 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2152 let client = mock(&[(
2153 schema_url,
2155 schema_body,
2156 )]);
2157 let c = ValidateArgs {
2158 globs: vec![pattern],
2159 exclude: vec![],
2160 cache_dir: None,
2161 force_schema_fetch: true,
2162 force_validation: true,
2163 no_catalog: true,
2164 config_dir: None,
2165 schema_cache_ttl: None,
2166 };
2167 let result = run_with(&c, Some(client), |_| {}).await?;
2168 assert!(
2169 !result.has_errors(),
2170 "schema URI with fragment should not cause compilation error"
2171 );
2172 assert_eq!(result.files_checked(), 1);
2173 Ok(())
2174 }
2175
2176 #[tokio::test]
2177 async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2178 let tmp = tempfile::tempdir()?;
2179
2180 std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2182
2183 let schema_path = tmp.path().join("schema.json");
2185 std::fs::write(
2186 &schema_path,
2187 r#"{
2188 "type": "object",
2189 "properties": {
2190 "name": { "$ref": "./defs.json" }
2191 },
2192 "required": ["name"]
2193 }"#,
2194 )?;
2195
2196 let schema_uri = schema_path.to_string_lossy();
2198 std::fs::write(
2199 tmp.path().join("data.json"),
2200 format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2201 )?;
2202
2203 std::fs::write(
2205 tmp.path().join("bad.json"),
2206 format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2207 )?;
2208
2209 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2210 let args = ValidateArgs {
2211 globs: vec![pattern],
2212 exclude: vec![],
2213 cache_dir: None,
2214 force_schema_fetch: true,
2215 force_validation: true,
2216 no_catalog: true,
2217 config_dir: None,
2218 schema_cache_ttl: None,
2219 };
2220 let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2221
2222 assert!(result.has_errors());
2224 assert_eq!(result.errors.len(), 1);
2226 Ok(())
2227 }
2228}