1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog;
11use lintel_schema_cache::{CacheStatus, SchemaCache};
12use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
13use schema_catalog::{CompiledCatalog, FileFormat};
14
15use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
16use crate::discover;
17use crate::parsers::{self, Parser};
18use crate::registry;
19
20const FD_CONCURRENCY_LIMIT: usize = 128;
24
25struct LocalRetriever {
28 http: SchemaCache,
29}
30
31#[async_trait::async_trait]
32impl jsonschema::AsyncRetrieve for LocalRetriever {
33 async fn retrieve(
34 &self,
35 uri: &jsonschema::Uri<String>,
36 ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
37 let s = uri.as_str();
38 if let Some(raw) = s.strip_prefix("file://") {
39 let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
40 let content = tokio::fs::read_to_string(path.as_ref()).await?;
41 Ok(serde_json::from_str(&content)?)
42 } else {
43 self.http.retrieve(uri).await
44 }
45 }
46}
47
48pub struct ValidateArgs {
49 pub globs: Vec<String>,
51
52 pub exclude: Vec<String>,
54
55 pub cache_dir: Option<String>,
57
58 pub force_schema_fetch: bool,
60
61 pub force_validation: bool,
63
64 pub no_catalog: bool,
66
67 pub config_dir: Option<PathBuf>,
69
70 pub schema_cache_ttl: Option<core::time::Duration>,
72}
73
74pub use crate::diagnostics::LintError;
77
78pub struct CheckedFile {
80 pub path: String,
81 pub schema: String,
82 pub cache_status: Option<CacheStatus>,
84 pub validation_cache_status: Option<ValidationCacheStatus>,
86}
87
88pub struct ValidateResult {
90 pub errors: Vec<LintError>,
91 pub checked: Vec<CheckedFile>,
92}
93
94impl ValidateResult {
95 pub fn has_errors(&self) -> bool {
96 !self.errors.is_empty()
97 }
98
99 pub fn files_checked(&self) -> usize {
100 self.checked.len()
101 }
102}
103
104struct ParsedFile {
110 path: String,
111 content: String,
112 instance: Value,
113 original_schema_uri: String,
115}
116
117#[tracing::instrument(skip_all)]
125pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
126 let start_dir = match search_dir {
127 Some(d) => d.to_path_buf(),
128 None => match std::env::current_dir() {
129 Ok(d) => d,
130 Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
131 },
132 };
133
134 let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
135 return (lintel_config::Config::default(), start_dir, None);
136 };
137
138 let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
139 let cfg = lintel_config::find_and_load(&start_dir)
140 .ok()
141 .flatten()
142 .unwrap_or_default();
143 (cfg, dir, Some(config_path))
144}
145
146#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
156pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
157 if globs.is_empty() {
158 return discover::discover_files(".", exclude);
159 }
160
161 let mut result = Vec::new();
162 for pattern in globs {
163 let path = Path::new(pattern);
164 if path.is_dir() {
165 result.extend(discover::discover_files(pattern, exclude)?);
166 } else {
167 for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
168 let path = entry?;
169 if path.is_file() && !is_excluded(&path, exclude) {
170 result.push(path);
171 }
172 }
173 }
174 }
175 Ok(result)
176}
177
178fn is_excluded(path: &Path, excludes: &[String]) -> bool {
179 let path_str = match path.to_str() {
180 Some(s) => s.strip_prefix("./").unwrap_or(s),
181 None => return false,
182 };
183 excludes
184 .iter()
185 .any(|pattern| glob_match::glob_match(pattern, path_str))
186}
187
188pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
197 use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
198 const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
199
200 for fmt in FORMATS {
201 let parser = parsers::parser_for(fmt);
202 if let Ok(val) = parser.parse(content, file_name) {
203 return Some((fmt, val));
204 }
205 }
206 None
207}
208
209enum FileResult {
212 Parsed {
213 schema_uri: String,
214 parsed: ParsedFile,
215 },
216 Error(LintError),
217 Skip,
218}
219
220fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
226 if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
227 return schema_uri.to_string();
228 }
229 if let Some(dir) = base_dir {
230 dir.join(schema_uri).to_string_lossy().to_string()
231 } else {
232 schema_uri.to_string()
233 }
234}
235
236#[allow(clippy::too_many_arguments)]
240fn process_one_file(
241 path: &Path,
242 content: String,
243 config: &lintel_config::Config,
244 config_dir: &Path,
245 compiled_catalogs: &[CompiledCatalog],
246) -> Vec<FileResult> {
247 let path_str = path.display().to_string();
248 let file_name = path
249 .file_name()
250 .and_then(|n| n.to_str())
251 .unwrap_or(&path_str);
252
253 let detected_format = parsers::detect_format(path);
254
255 if detected_format == Some(FileFormat::Jsonl) {
257 return process_jsonl_file(
258 path,
259 &path_str,
260 file_name,
261 &content,
262 config,
263 config_dir,
264 compiled_catalogs,
265 );
266 }
267
268 if detected_format.is_none() {
270 let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
271 || compiled_catalogs
272 .iter()
273 .any(|cat| cat.find_schema(&path_str, file_name).is_some());
274 if !has_match {
275 return vec![FileResult::Skip];
276 }
277 }
278
279 let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
281 let parser = parsers::parser_for(fmt);
282 match parser.parse(&content, &path_str) {
283 Ok(val) => (parser, val),
284 Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
285 }
286 } else {
287 match try_parse_all(&content, &path_str) {
288 Some((fmt, val)) => (parsers::parser_for(fmt), val),
289 None => return vec![FileResult::Skip],
290 }
291 };
292
293 if instance.is_null() {
295 return vec![FileResult::Skip];
296 }
297
298 let inline_uri = parser.extract_schema_uri(&content, &instance);
306 let from_inline = inline_uri.is_some();
307 let schema_uri = inline_uri
308 .or_else(|| {
309 config
310 .find_schema_mapping(&path_str, file_name)
311 .map(str::to_string)
312 })
313 .or_else(|| {
314 compiled_catalogs
315 .iter()
316 .find_map(|cat| cat.find_schema(&path_str, file_name))
317 .map(str::to_string)
318 });
319
320 let Some(schema_uri) = schema_uri else {
321 return vec![FileResult::Skip];
322 };
323
324 let original_schema_uri = schema_uri.clone();
326
327 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
329 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
330
331 let schema_uri = resolve_local_schema_path(
335 &schema_uri,
336 if from_inline {
337 path.parent()
338 } else {
339 Some(config_dir)
340 },
341 );
342
343 vec![FileResult::Parsed {
344 schema_uri,
345 parsed: ParsedFile {
346 path: path_str,
347 content,
348 instance,
349 original_schema_uri,
350 },
351 }]
352}
353
354#[allow(clippy::too_many_arguments)]
362fn process_jsonl_file(
363 path: &Path,
364 path_str: &str,
365 file_name: &str,
366 content: &str,
367 config: &lintel_config::Config,
368 config_dir: &Path,
369 compiled_catalogs: &[CompiledCatalog],
370) -> Vec<FileResult> {
371 let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
372 Ok(lines) => lines,
373 Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
374 };
375
376 if lines.is_empty() {
377 return vec![FileResult::Skip];
378 }
379
380 let mut results = Vec::with_capacity(lines.len());
381
382 if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
384 for m in mismatches {
385 results.push(FileResult::Error(LintError::SchemaMismatch {
386 path: path_str.to_string(),
387 line_number: m.line_number,
388 message: format!("expected consistent $schema but found {}", m.schema_uri),
389 }));
390 }
391 }
392
393 for line in lines {
394 let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
397 let from_inline = inline_uri.is_some();
398 let schema_uri = inline_uri
399 .or_else(|| {
400 config
401 .find_schema_mapping(path_str, file_name)
402 .map(str::to_string)
403 })
404 .or_else(|| {
405 compiled_catalogs
406 .iter()
407 .find_map(|cat| cat.find_schema(path_str, file_name))
408 .map(str::to_string)
409 });
410
411 let Some(schema_uri) = schema_uri else {
412 continue;
413 };
414
415 let original_schema_uri = schema_uri.clone();
416
417 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
418 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
419
420 let schema_uri = resolve_local_schema_path(
422 &schema_uri,
423 if from_inline {
424 path.parent()
425 } else {
426 Some(config_dir)
427 },
428 );
429
430 let line_path = format!("{path_str}:{}", line.line_number);
431
432 results.push(FileResult::Parsed {
433 schema_uri,
434 parsed: ParsedFile {
435 path: line_path,
436 content: line.raw,
437 instance: line.value,
438 original_schema_uri,
439 },
440 });
441 }
442
443 if results.is_empty() {
444 vec![FileResult::Skip]
445 } else {
446 results
447 }
448}
449
450#[tracing::instrument(skip_all, fields(file_count = files.len()))]
453#[allow(clippy::too_many_arguments)]
454async fn parse_and_group_files(
455 files: &[PathBuf],
456 config: &lintel_config::Config,
457 config_dir: &Path,
458 compiled_catalogs: &[CompiledCatalog],
459 errors: &mut Vec<LintError>,
460) -> BTreeMap<String, Vec<ParsedFile>> {
461 let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
464 let mut read_set = tokio::task::JoinSet::new();
465 for path in files {
466 let path = path.clone();
467 let sem = semaphore.clone();
468 read_set.spawn(async move {
469 let _permit = sem.acquire().await.expect("semaphore closed");
470 let result = tokio::fs::read_to_string(&path).await;
471 (path, result)
472 });
473 }
474
475 let mut file_contents = Vec::with_capacity(files.len());
476 while let Some(result) = read_set.join_next().await {
477 match result {
478 Ok(item) => file_contents.push(item),
479 Err(e) => tracing::warn!("file read task panicked: {e}"),
480 }
481 }
482
483 let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
485 for (path, content_result) in file_contents {
486 let content = match content_result {
487 Ok(c) => c,
488 Err(e) => {
489 errors.push(LintError::Io {
490 path: path.display().to_string(),
491 message: format!("failed to read: {e}"),
492 });
493 continue;
494 }
495 };
496 let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
497 for result in results {
498 match result {
499 FileResult::Parsed { schema_uri, parsed } => {
500 schema_groups.entry(schema_uri).or_default().push(parsed);
501 }
502 FileResult::Error(e) => errors.push(e),
503 FileResult::Skip => {}
504 }
505 }
506 }
507
508 schema_groups
509}
510
511#[allow(clippy::too_many_arguments)]
520async fn fetch_schema_from_prefetched(
521 schema_uri: &str,
522 prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
523 local_cache: &mut HashMap<String, Value>,
524 group: &[ParsedFile],
525 errors: &mut Vec<LintError>,
526 checked: &mut Vec<CheckedFile>,
527 on_check: &mut impl FnMut(&CheckedFile),
528) -> Option<(Value, Option<CacheStatus>)> {
529 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
530
531 let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
532 match prefetched.get(schema_uri) {
533 Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
534 Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
535 None => Err(format!("schema not prefetched: {schema_uri}")),
536 }
537 } else if let Some(cached) = local_cache.get(schema_uri) {
538 Ok((cached.clone(), None))
539 } else {
540 tokio::fs::read_to_string(schema_uri)
541 .await
542 .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
543 .and_then(|content| {
544 serde_json::from_str::<Value>(&content)
545 .map(|v| {
546 local_cache.insert(schema_uri.to_string(), v.clone());
547 (v, None)
548 })
549 .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
550 })
551 };
552
553 match result {
554 Ok(value) => Some(value),
555 Err(message) => {
556 report_group_error(
557 |path| LintError::SchemaFetch {
558 path: path.to_string(),
559 message: message.clone(),
560 },
561 schema_uri,
562 None,
563 group,
564 errors,
565 checked,
566 on_check,
567 );
568 None
569 }
570 }
571}
572
573#[allow(clippy::too_many_arguments)]
575fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
576 make_error: impl Fn(&str) -> LintError,
577 schema_uri: &str,
578 cache_status: Option<CacheStatus>,
579 group: &[P],
580 errors: &mut Vec<LintError>,
581 checked: &mut Vec<CheckedFile>,
582 on_check: &mut impl FnMut(&CheckedFile),
583) {
584 for item in group {
585 let pf = item.borrow();
586 let cf = CheckedFile {
587 path: pf.path.clone(),
588 schema: schema_uri.to_string(),
589 cache_status,
590 validation_cache_status: None,
591 };
592 on_check(&cf);
593 checked.push(cf);
594 errors.push(make_error(&pf.path));
595 }
596}
597
598#[allow(clippy::too_many_arguments)]
600fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
601 schema_uri: &str,
602 cache_status: Option<CacheStatus>,
603 validation_cache_status: Option<ValidationCacheStatus>,
604 group: &[P],
605 checked: &mut Vec<CheckedFile>,
606 on_check: &mut impl FnMut(&CheckedFile),
607) {
608 for item in group {
609 let pf = item.borrow();
610 let cf = CheckedFile {
611 path: pf.path.clone(),
612 schema: schema_uri.to_string(),
613 cache_status,
614 validation_cache_status,
615 };
616 on_check(&cf);
617 checked.push(cf);
618 }
619}
620
621fn clean_error_message(msg: String) -> String {
630 const MARKER: &str = " is not valid under any of the schemas listed in the '";
631 if let Some(pos) = msg.find(MARKER) {
632 return msg[pos + 4..].to_string();
634 }
635 msg
636}
637
638fn push_validation_errors(
640 pf: &ParsedFile,
641 schema_url: &str,
642 validation_errors: &[ValidationError],
643 errors: &mut Vec<LintError>,
644) {
645 for ve in validation_errors {
646 let span = find_instance_path_span(&pf.content, &ve.instance_path);
647 let instance_path = if ve.instance_path.is_empty() {
648 DEFAULT_LABEL.to_string()
649 } else {
650 ve.instance_path.clone()
651 };
652 let label = format_label(&instance_path, &ve.schema_path);
653 let source_span: miette::SourceSpan = span.into();
654 errors.push(LintError::Validation {
655 src: miette::NamedSource::new(&pf.path, pf.content.clone()),
656 span: source_span,
657 schema_span: source_span,
658 path: pf.path.clone(),
659 instance_path,
660 label,
661 message: ve.message.clone(),
662 schema_url: schema_url.to_string(),
663 schema_path: ve.schema_path.clone(),
664 });
665 }
666}
667
668#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
671#[allow(clippy::too_many_arguments)]
672async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
673 validator: &jsonschema::Validator,
674 schema_uri: &str,
675 schema_hash: &str,
676 validate_formats: bool,
677 cache_status: Option<CacheStatus>,
678 group: &[P],
679 vcache: &lintel_validation_cache::ValidationCache,
680 errors: &mut Vec<LintError>,
681 checked: &mut Vec<CheckedFile>,
682 on_check: &mut impl FnMut(&CheckedFile),
683) {
684 for item in group {
685 let pf = item.borrow();
686 let file_errors: Vec<ValidationError> = validator
687 .iter_errors(&pf.instance)
688 .map(|error| ValidationError {
689 instance_path: error.instance_path().to_string(),
690 message: clean_error_message(error.to_string()),
691 schema_path: error.schema_path().to_string(),
692 })
693 .collect();
694
695 vcache
696 .store(
697 &lintel_validation_cache::CacheKey {
698 file_content: &pf.content,
699 schema_hash,
700 validate_formats,
701 },
702 &file_errors,
703 )
704 .await;
705 push_validation_errors(pf, schema_uri, &file_errors, errors);
706
707 let cf = CheckedFile {
708 path: pf.path.clone(),
709 schema: schema_uri.to_string(),
710 cache_status,
711 validation_cache_status: Some(ValidationCacheStatus::Miss),
712 };
713 on_check(&cf);
714 checked.push(cf);
715 }
716}
717
718pub async fn fetch_compiled_catalogs(
726 retriever: &SchemaCache,
727 config: &lintel_config::Config,
728 no_catalog: bool,
729) -> Vec<CompiledCatalog> {
730 let mut compiled_catalogs = Vec::new();
731
732 if !no_catalog {
733 let catalog_span = tracing::info_span!("fetch_catalogs").entered();
734
735 #[allow(clippy::items_after_statements)]
739 type CatalogResult = (
740 usize, String,
742 Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
743 );
744 let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
745
746 for (i, registry_url) in config.registries.iter().enumerate() {
748 let r = retriever.clone();
749 let url = registry_url.clone();
750 let label = format!("registry {url}");
751 catalog_tasks.spawn(async move {
752 let result = registry::fetch(&r, &url)
753 .await
754 .map(|cat| CompiledCatalog::compile(&cat));
755 (i, label, result)
756 });
757 }
758
759 let lintel_order = config.registries.len();
761 if !config.no_default_catalog {
762 let r = retriever.clone();
763 let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
764 catalog_tasks.spawn(async move {
765 let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
766 .await
767 .map(|cat| CompiledCatalog::compile(&cat));
768 (lintel_order, label, result)
769 });
770 }
771
772 let schemastore_order = config.registries.len() + 1;
774 let r = retriever.clone();
775 catalog_tasks.spawn(async move {
776 let result = catalog::fetch_catalog(&r)
777 .await
778 .map(|cat| CompiledCatalog::compile(&cat));
779 (schemastore_order, "SchemaStore catalog".to_string(), result)
780 });
781
782 let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
783 while let Some(result) = catalog_tasks.join_next().await {
784 match result {
785 Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
786 Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
787 Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
788 }
789 }
790 results.sort_by_key(|(order, _)| *order);
791 compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
792
793 drop(catalog_span);
794 }
795
796 compiled_catalogs
797}
798
799pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
803 run_with(args, None, |_| {}).await
804}
805
806#[tracing::instrument(skip_all, name = "validate")]
813#[allow(clippy::too_many_lines)]
814pub async fn run_with(
815 args: &ValidateArgs,
816 cache: Option<SchemaCache>,
817 mut on_check: impl FnMut(&CheckedFile),
818) -> Result<ValidateResult> {
819 let retriever = if let Some(c) = cache {
820 c
821 } else {
822 let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
823 if let Some(dir) = &args.cache_dir {
824 let path = PathBuf::from(dir);
825 let _ = fs::create_dir_all(&path);
826 builder = builder.cache_dir(path);
827 }
828 if let Some(ttl) = args.schema_cache_ttl {
829 builder = builder.ttl(ttl);
830 }
831 builder.build()
832 };
833
834 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
835 let files = collect_files(&args.globs, &args.exclude)?;
836 tracing::info!(file_count = files.len(), "collected files");
837
838 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
839
840 let mut errors: Vec<LintError> = Vec::new();
841 let mut checked: Vec<CheckedFile> = Vec::new();
842
843 let schema_groups = parse_and_group_files(
845 &files,
846 &config,
847 &config_dir,
848 &compiled_catalogs,
849 &mut errors,
850 )
851 .await;
852 tracing::info!(
853 schema_count = schema_groups.len(),
854 total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
855 "grouped files by schema"
856 );
857
858 let vcache = lintel_validation_cache::ValidationCache::new(
860 lintel_validation_cache::ensure_cache_dir(),
861 args.force_validation,
862 );
863
864 let remote_uris: Vec<&String> = schema_groups
866 .keys()
867 .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
868 .collect();
869
870 let prefetched = {
871 let _prefetch_span =
872 tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
873
874 let mut schema_tasks = tokio::task::JoinSet::new();
875 for uri in remote_uris {
876 let r = retriever.clone();
877 let u = uri.clone();
878 schema_tasks.spawn(async move {
879 let result = r.fetch(&u).await;
880 (u, result)
881 });
882 }
883
884 let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
885 while let Some(result) = schema_tasks.join_next().await {
886 match result {
887 Ok((uri, fetch_result)) => {
888 prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
889 }
890 Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
891 }
892 }
893
894 prefetched
895 };
896
897 let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
899 let mut fetch_time = core::time::Duration::ZERO;
900 let mut hash_time = core::time::Duration::ZERO;
901 let mut vcache_time = core::time::Duration::ZERO;
902 let mut compile_time = core::time::Duration::ZERO;
903 let mut validate_time = core::time::Duration::ZERO;
904
905 for (schema_uri, group) in &schema_groups {
906 let _group_span = tracing::debug_span!(
907 "schema_group",
908 schema = schema_uri.as_str(),
909 files = group.len(),
910 )
911 .entered();
912
913 let validate_formats = group.iter().all(|pf| {
916 config
917 .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
918 });
919
920 let t = std::time::Instant::now();
923 let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
924 schema_uri,
925 &prefetched,
926 &mut local_schema_cache,
927 group,
928 &mut errors,
929 &mut checked,
930 &mut on_check,
931 )
932 .await
933 else {
934 fetch_time += t.elapsed();
935 continue;
936 };
937 fetch_time += t.elapsed();
938
939 let t = std::time::Instant::now();
941 let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
942 hash_time += t.elapsed();
943
944 let mut cache_misses: Vec<&ParsedFile> = Vec::new();
946
947 let t = std::time::Instant::now();
948 for pf in group {
949 let (cached, vcache_status) = vcache
950 .lookup(&lintel_validation_cache::CacheKey {
951 file_content: &pf.content,
952 schema_hash: &schema_hash,
953 validate_formats,
954 })
955 .await;
956
957 if let Some(cached_errors) = cached {
958 push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
959 let cf = CheckedFile {
960 path: pf.path.clone(),
961 schema: schema_uri.clone(),
962 cache_status,
963 validation_cache_status: Some(vcache_status),
964 };
965 on_check(&cf);
966 checked.push(cf);
967 } else {
968 cache_misses.push(pf);
969 }
970 }
971 vcache_time += t.elapsed();
972
973 tracing::debug!(
974 cache_hits = group.len() - cache_misses.len(),
975 cache_misses = cache_misses.len(),
976 "validation cache"
977 );
978
979 if cache_misses.is_empty() {
981 continue;
982 }
983
984 let t = std::time::Instant::now();
986 let validator = {
987 let is_remote_schema =
991 schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
992 let local_retriever = LocalRetriever {
993 http: retriever.clone(),
994 };
995 let opts = jsonschema::async_options()
996 .with_retriever(local_retriever)
997 .should_validate_formats(validate_formats);
998 let base_uri = if is_remote_schema {
999 let uri = match schema_uri.find('#') {
1001 Some(pos) => schema_uri[..pos].to_string(),
1002 None => schema_uri.clone(),
1003 };
1004 Some(uri)
1005 } else {
1006 std::fs::canonicalize(schema_uri)
1007 .ok()
1008 .map(|p| format!("file://{}", p.display()))
1009 };
1010 let opts = if let Some(uri) = base_uri {
1011 opts.with_base_uri(uri)
1012 } else {
1013 opts
1014 };
1015 match opts.build(&schema_value).await {
1016 Ok(v) => v,
1017 Err(e) => {
1018 compile_time += t.elapsed();
1019 if !validate_formats && e.to_string().contains("uri-reference") {
1023 mark_group_checked(
1024 schema_uri,
1025 cache_status,
1026 Some(ValidationCacheStatus::Miss),
1027 &cache_misses,
1028 &mut checked,
1029 &mut on_check,
1030 );
1031 continue;
1032 }
1033 let msg = format!("failed to compile schema: {e}");
1034 report_group_error(
1035 |path| LintError::SchemaCompile {
1036 path: path.to_string(),
1037 message: msg.clone(),
1038 },
1039 schema_uri,
1040 cache_status,
1041 &cache_misses,
1042 &mut errors,
1043 &mut checked,
1044 &mut on_check,
1045 );
1046 continue;
1047 }
1048 }
1049 };
1050 compile_time += t.elapsed();
1051
1052 let t = std::time::Instant::now();
1053 validate_group(
1054 &validator,
1055 schema_uri,
1056 &schema_hash,
1057 validate_formats,
1058 cache_status,
1059 &cache_misses,
1060 &vcache,
1061 &mut errors,
1062 &mut checked,
1063 &mut on_check,
1064 )
1065 .await;
1066 validate_time += t.elapsed();
1067 }
1068
1069 #[allow(clippy::cast_possible_truncation)]
1070 {
1071 tracing::info!(
1072 fetch_ms = fetch_time.as_millis() as u64,
1073 hash_ms = hash_time.as_millis() as u64,
1074 vcache_ms = vcache_time.as_millis() as u64,
1075 compile_ms = compile_time.as_millis() as u64,
1076 validate_ms = validate_time.as_millis() as u64,
1077 "phase2 breakdown"
1078 );
1079 }
1080
1081 errors.sort_by(|a, b| {
1083 a.path()
1084 .cmp(b.path())
1085 .then_with(|| a.offset().cmp(&b.offset()))
1086 });
1087
1088 Ok(ValidateResult { errors, checked })
1089}
1090
1091#[cfg(test)]
1092mod tests {
1093 use super::*;
1094 use lintel_schema_cache::SchemaCache;
1095 use std::path::Path;
1096
1097 fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1098 let cache = SchemaCache::memory();
1099 for (uri, body) in entries {
1100 cache.insert(
1101 uri,
1102 serde_json::from_str(body).expect("test mock: invalid JSON"),
1103 );
1104 }
1105 cache
1106 }
1107
1108 fn testdata() -> PathBuf {
1109 Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1110 }
1111
1112 fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1114 dirs.iter()
1115 .flat_map(|dir| {
1116 let base = testdata().join(dir);
1117 vec![
1118 base.join("*.json").to_string_lossy().to_string(),
1119 base.join("*.yaml").to_string_lossy().to_string(),
1120 base.join("*.yml").to_string_lossy().to_string(),
1121 base.join("*.json5").to_string_lossy().to_string(),
1122 base.join("*.jsonc").to_string_lossy().to_string(),
1123 base.join("*.toml").to_string_lossy().to_string(),
1124 ]
1125 })
1126 .collect()
1127 }
1128
1129 fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1130 ValidateArgs {
1131 globs: scenario_globs(dirs),
1132 exclude: vec![],
1133 cache_dir: None,
1134 force_schema_fetch: true,
1135 force_validation: true,
1136 no_catalog: true,
1137 config_dir: None,
1138 schema_cache_ttl: None,
1139 }
1140 }
1141
1142 const SCHEMA: &str =
1143 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1144
1145 fn schema_mock() -> SchemaCache {
1146 mock(&[("https://example.com/schema.json", SCHEMA)])
1147 }
1148
1149 #[tokio::test]
1152 async fn no_matching_files() -> anyhow::Result<()> {
1153 let tmp = tempfile::tempdir()?;
1154 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1155 let c = ValidateArgs {
1156 globs: vec![pattern],
1157 exclude: vec![],
1158 cache_dir: None,
1159 force_schema_fetch: true,
1160 force_validation: true,
1161 no_catalog: true,
1162 config_dir: None,
1163 schema_cache_ttl: None,
1164 };
1165 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1166 assert!(!result.has_errors());
1167 Ok(())
1168 }
1169
1170 #[tokio::test]
1171 async fn dir_all_valid() -> anyhow::Result<()> {
1172 let c = args_for_dirs(&["positive_tests"]);
1173 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1174 assert!(!result.has_errors());
1175 Ok(())
1176 }
1177
1178 #[tokio::test]
1179 async fn dir_all_invalid() -> anyhow::Result<()> {
1180 let c = args_for_dirs(&["negative_tests"]);
1181 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1182 assert!(result.has_errors());
1183 Ok(())
1184 }
1185
1186 #[tokio::test]
1187 async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1188 let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1189 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1190 assert!(result.has_errors());
1191 Ok(())
1192 }
1193
1194 #[tokio::test]
1195 async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1196 let c = args_for_dirs(&["no_schema"]);
1197 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1198 assert!(!result.has_errors());
1199 Ok(())
1200 }
1201
1202 #[tokio::test]
1203 async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1204 let c = args_for_dirs(&["positive_tests", "no_schema"]);
1205 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1206 assert!(!result.has_errors());
1207 Ok(())
1208 }
1209
1210 #[tokio::test]
1213 async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1214 let dir = testdata().join("positive_tests");
1215 let c = ValidateArgs {
1216 globs: vec![dir.to_string_lossy().to_string()],
1217 exclude: vec![],
1218 cache_dir: None,
1219 force_schema_fetch: true,
1220 force_validation: true,
1221 no_catalog: true,
1222 config_dir: None,
1223 schema_cache_ttl: None,
1224 };
1225 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1226 assert!(!result.has_errors());
1227 assert!(result.files_checked() > 0);
1228 Ok(())
1229 }
1230
1231 #[tokio::test]
1232 async fn multiple_directory_args() -> anyhow::Result<()> {
1233 let pos_dir = testdata().join("positive_tests");
1234 let no_schema_dir = testdata().join("no_schema");
1235 let c = ValidateArgs {
1236 globs: vec![
1237 pos_dir.to_string_lossy().to_string(),
1238 no_schema_dir.to_string_lossy().to_string(),
1239 ],
1240 exclude: vec![],
1241 cache_dir: None,
1242 force_schema_fetch: true,
1243 force_validation: true,
1244 no_catalog: true,
1245 config_dir: None,
1246 schema_cache_ttl: None,
1247 };
1248 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1249 assert!(!result.has_errors());
1250 Ok(())
1251 }
1252
1253 #[tokio::test]
1254 async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1255 let dir = testdata().join("positive_tests");
1256 let glob_pattern = testdata()
1257 .join("no_schema")
1258 .join("*.json")
1259 .to_string_lossy()
1260 .to_string();
1261 let c = ValidateArgs {
1262 globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1263 exclude: vec![],
1264 cache_dir: None,
1265 force_schema_fetch: true,
1266 force_validation: true,
1267 no_catalog: true,
1268 config_dir: None,
1269 schema_cache_ttl: None,
1270 };
1271 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1272 assert!(!result.has_errors());
1273 Ok(())
1274 }
1275
1276 #[tokio::test]
1277 async fn malformed_json_parse_error() -> anyhow::Result<()> {
1278 let base = testdata().join("malformed");
1279 let c = ValidateArgs {
1280 globs: vec![base.join("*.json").to_string_lossy().to_string()],
1281 exclude: vec![],
1282 cache_dir: None,
1283 force_schema_fetch: true,
1284 force_validation: true,
1285 no_catalog: true,
1286 config_dir: None,
1287 schema_cache_ttl: None,
1288 };
1289 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1290 assert!(result.has_errors());
1291 Ok(())
1292 }
1293
1294 #[tokio::test]
1295 async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1296 let base = testdata().join("malformed");
1297 let c = ValidateArgs {
1298 globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1299 exclude: vec![],
1300 cache_dir: None,
1301 force_schema_fetch: true,
1302 force_validation: true,
1303 no_catalog: true,
1304 config_dir: None,
1305 schema_cache_ttl: None,
1306 };
1307 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1308 assert!(result.has_errors());
1309 Ok(())
1310 }
1311
1312 #[tokio::test]
1315 async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1316 let base = testdata().join("negative_tests");
1317 let c = ValidateArgs {
1318 globs: scenario_globs(&["positive_tests", "negative_tests"]),
1319 exclude: vec![
1320 base.join("missing_name.json").to_string_lossy().to_string(),
1321 base.join("missing_name.toml").to_string_lossy().to_string(),
1322 base.join("missing_name.yaml").to_string_lossy().to_string(),
1323 ],
1324 cache_dir: None,
1325 force_schema_fetch: true,
1326 force_validation: true,
1327 no_catalog: true,
1328 config_dir: None,
1329 schema_cache_ttl: None,
1330 };
1331 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1332 assert!(!result.has_errors());
1333 Ok(())
1334 }
1335
1336 #[tokio::test]
1339 async fn custom_cache_dir() -> anyhow::Result<()> {
1340 let c = ValidateArgs {
1341 globs: scenario_globs(&["positive_tests"]),
1342 exclude: vec![],
1343 cache_dir: None,
1344 force_schema_fetch: true,
1345 force_validation: true,
1346 no_catalog: true,
1347 config_dir: None,
1348 schema_cache_ttl: None,
1349 };
1350 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1351 assert!(!result.has_errors());
1352 Ok(())
1353 }
1354
1355 #[tokio::test]
1358 async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1359 let tmp = tempfile::tempdir()?;
1360 let schema_path = tmp.path().join("schema.json");
1361 fs::write(&schema_path, SCHEMA)?;
1362
1363 let f = tmp.path().join("valid.json");
1364 fs::write(
1365 &f,
1366 format!(
1367 r#"{{"$schema":"{}","name":"hello"}}"#,
1368 schema_path.to_string_lossy()
1369 ),
1370 )?;
1371
1372 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1373 let c = ValidateArgs {
1374 globs: vec![pattern],
1375 exclude: vec![],
1376 cache_dir: None,
1377 force_schema_fetch: true,
1378 force_validation: true,
1379 no_catalog: true,
1380 config_dir: None,
1381 schema_cache_ttl: None,
1382 };
1383 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1384 assert!(!result.has_errors());
1385 Ok(())
1386 }
1387
1388 #[tokio::test]
1389 async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1390 let tmp = tempfile::tempdir()?;
1391 let schema_path = tmp.path().join("schema.json");
1392 fs::write(&schema_path, SCHEMA)?;
1393
1394 let f = tmp.path().join("valid.yaml");
1395 fs::write(
1396 &f,
1397 format!(
1398 "# yaml-language-server: $schema={}\nname: hello\n",
1399 schema_path.to_string_lossy()
1400 ),
1401 )?;
1402
1403 let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1404 let c = ValidateArgs {
1405 globs: vec![pattern],
1406 exclude: vec![],
1407 cache_dir: None,
1408 force_schema_fetch: true,
1409 force_validation: true,
1410 no_catalog: true,
1411 config_dir: None,
1412 schema_cache_ttl: None,
1413 };
1414 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1415 assert!(!result.has_errors());
1416 Ok(())
1417 }
1418
1419 #[tokio::test]
1420 async fn missing_local_schema_errors() -> anyhow::Result<()> {
1421 let tmp = tempfile::tempdir()?;
1422 let f = tmp.path().join("ref.json");
1423 fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1424
1425 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1426 let c = ValidateArgs {
1427 globs: vec![pattern],
1428 exclude: vec![],
1429 cache_dir: None,
1430 force_schema_fetch: true,
1431 force_validation: true,
1432 no_catalog: true,
1433 config_dir: None,
1434 schema_cache_ttl: None,
1435 };
1436 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1437 assert!(result.has_errors());
1438 Ok(())
1439 }
1440
1441 #[tokio::test]
1444 async fn json5_valid_with_schema() -> anyhow::Result<()> {
1445 let tmp = tempfile::tempdir()?;
1446 let schema_path = tmp.path().join("schema.json");
1447 fs::write(&schema_path, SCHEMA)?;
1448
1449 let f = tmp.path().join("config.json5");
1450 fs::write(
1451 &f,
1452 format!(
1453 r#"{{
1454 // JSON5 comment
1455 "$schema": "{}",
1456 name: "hello",
1457}}"#,
1458 schema_path.to_string_lossy()
1459 ),
1460 )?;
1461
1462 let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1463 let c = ValidateArgs {
1464 globs: vec![pattern],
1465 exclude: vec![],
1466 cache_dir: None,
1467 force_schema_fetch: true,
1468 force_validation: true,
1469 no_catalog: true,
1470 config_dir: None,
1471 schema_cache_ttl: None,
1472 };
1473 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1474 assert!(!result.has_errors());
1475 Ok(())
1476 }
1477
1478 #[tokio::test]
1479 async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1480 let tmp = tempfile::tempdir()?;
1481 let schema_path = tmp.path().join("schema.json");
1482 fs::write(&schema_path, SCHEMA)?;
1483
1484 let f = tmp.path().join("config.jsonc");
1485 fs::write(
1486 &f,
1487 format!(
1488 r#"{{
1489 /* JSONC comment */
1490 "$schema": "{}",
1491 "name": "hello"
1492}}"#,
1493 schema_path.to_string_lossy()
1494 ),
1495 )?;
1496
1497 let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1498 let c = ValidateArgs {
1499 globs: vec![pattern],
1500 exclude: vec![],
1501 cache_dir: None,
1502 force_schema_fetch: true,
1503 force_validation: true,
1504 no_catalog: true,
1505 config_dir: None,
1506 schema_cache_ttl: None,
1507 };
1508 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1509 assert!(!result.has_errors());
1510 Ok(())
1511 }
1512
1513 const GH_WORKFLOW_SCHEMA: &str = r#"{
1516 "type": "object",
1517 "properties": {
1518 "name": { "type": "string" },
1519 "on": {},
1520 "jobs": { "type": "object" }
1521 },
1522 "required": ["on", "jobs"]
1523 }"#;
1524
1525 fn gh_catalog_json() -> String {
1526 r#"{"version":1,"schemas":[{
1527 "name": "GitHub Workflow",
1528 "description": "GitHub Actions workflow",
1529 "url": "https://www.schemastore.org/github-workflow.json",
1530 "fileMatch": [
1531 "**/.github/workflows/*.yml",
1532 "**/.github/workflows/*.yaml"
1533 ]
1534 }]}"#
1535 .to_string()
1536 }
1537
1538 #[tokio::test]
1539 async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1540 let tmp = tempfile::tempdir()?;
1541 let cache_tmp = tempfile::tempdir()?;
1542 let wf_dir = tmp.path().join(".github/workflows");
1543 fs::create_dir_all(&wf_dir)?;
1544 fs::write(
1545 wf_dir.join("ci.yml"),
1546 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1547 )?;
1548
1549 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1550 let client = mock(&[
1551 (
1552 "https://www.schemastore.org/api/json/catalog.json",
1553 &gh_catalog_json(),
1554 ),
1555 (
1556 "https://www.schemastore.org/github-workflow.json",
1557 GH_WORKFLOW_SCHEMA,
1558 ),
1559 ]);
1560 let c = ValidateArgs {
1561 globs: vec![pattern],
1562 exclude: vec![],
1563 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1564 force_schema_fetch: true,
1565 force_validation: true,
1566 no_catalog: false,
1567 config_dir: None,
1568 schema_cache_ttl: None,
1569 };
1570 let result = run_with(&c, Some(client), |_| {}).await?;
1571 assert!(!result.has_errors());
1572 Ok(())
1573 }
1574
1575 #[tokio::test]
1576 async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1577 let tmp = tempfile::tempdir()?;
1578 let cache_tmp = tempfile::tempdir()?;
1579 let wf_dir = tmp.path().join(".github/workflows");
1580 fs::create_dir_all(&wf_dir)?;
1581 fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1582
1583 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1584 let client = mock(&[
1585 (
1586 "https://www.schemastore.org/api/json/catalog.json",
1587 &gh_catalog_json(),
1588 ),
1589 (
1590 "https://www.schemastore.org/github-workflow.json",
1591 GH_WORKFLOW_SCHEMA,
1592 ),
1593 ]);
1594 let c = ValidateArgs {
1595 globs: vec![pattern],
1596 exclude: vec![],
1597 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1598 force_schema_fetch: true,
1599 force_validation: true,
1600 no_catalog: false,
1601 config_dir: None,
1602 schema_cache_ttl: None,
1603 };
1604 let result = run_with(&c, Some(client), |_| {}).await?;
1605 assert!(result.has_errors());
1606 Ok(())
1607 }
1608
1609 #[tokio::test]
1610 async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1611 let tmp = tempfile::tempdir()?;
1612 let cache_tmp = tempfile::tempdir()?;
1613 let wf_dir = tmp.path().join(".github/workflows");
1614 fs::create_dir_all(&wf_dir)?;
1615 fs::write(
1616 wf_dir.join("ci.yml"),
1617 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1618 )?;
1619
1620 let client = mock(&[
1621 (
1622 "https://www.schemastore.org/api/json/catalog.json",
1623 &gh_catalog_json(),
1624 ),
1625 (
1626 "https://www.schemastore.org/github-workflow.json",
1627 GH_WORKFLOW_SCHEMA,
1628 ),
1629 ]);
1630 let c = ValidateArgs {
1631 globs: vec![],
1632 exclude: vec![],
1633 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1634 force_schema_fetch: true,
1635 force_validation: true,
1636 no_catalog: false,
1637 config_dir: None,
1638 schema_cache_ttl: None,
1639 };
1640
1641 let orig_dir = std::env::current_dir()?;
1642 std::env::set_current_dir(tmp.path())?;
1643 let result = run_with(&c, Some(client), |_| {}).await?;
1644 std::env::set_current_dir(orig_dir)?;
1645
1646 assert!(!result.has_errors());
1647 Ok(())
1648 }
1649
1650 #[tokio::test]
1653 async fn toml_valid_with_schema() -> anyhow::Result<()> {
1654 let tmp = tempfile::tempdir()?;
1655 let schema_path = tmp.path().join("schema.json");
1656 fs::write(&schema_path, SCHEMA)?;
1657
1658 let f = tmp.path().join("config.toml");
1659 fs::write(
1660 &f,
1661 format!(
1662 "# :schema {}\nname = \"hello\"\n",
1663 schema_path.to_string_lossy()
1664 ),
1665 )?;
1666
1667 let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1668 let c = ValidateArgs {
1669 globs: vec![pattern],
1670 exclude: vec![],
1671 cache_dir: None,
1672 force_schema_fetch: true,
1673 force_validation: true,
1674 no_catalog: true,
1675 config_dir: None,
1676 schema_cache_ttl: None,
1677 };
1678 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1679 assert!(!result.has_errors());
1680 Ok(())
1681 }
1682
1683 #[tokio::test]
1686 async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1687 let tmp = tempfile::tempdir()?;
1688
1689 let schemas_dir = tmp.path().join("schemas");
1690 fs::create_dir_all(&schemas_dir)?;
1691 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1692
1693 fs::write(
1694 tmp.path().join("lintel.toml"),
1695 r#"
1696[rewrite]
1697"http://localhost:9000/" = "//schemas/"
1698"#,
1699 )?;
1700
1701 let f = tmp.path().join("config.json");
1702 fs::write(
1703 &f,
1704 r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1705 )?;
1706
1707 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1708 let c = ValidateArgs {
1709 globs: vec![pattern],
1710 exclude: vec![],
1711 cache_dir: None,
1712 force_schema_fetch: true,
1713 force_validation: true,
1714 no_catalog: true,
1715 config_dir: Some(tmp.path().to_path_buf()),
1716 schema_cache_ttl: None,
1717 };
1718
1719 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1720 assert!(!result.has_errors());
1721 assert_eq!(result.files_checked(), 1);
1722 Ok(())
1723 }
1724
1725 #[tokio::test]
1726 async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1727 let tmp = tempfile::tempdir()?;
1728
1729 let schemas_dir = tmp.path().join("schemas");
1730 fs::create_dir_all(&schemas_dir)?;
1731 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1732
1733 fs::write(tmp.path().join("lintel.toml"), "")?;
1734
1735 let sub = tmp.path().join("deeply/nested");
1736 fs::create_dir_all(&sub)?;
1737 let f = sub.join("config.json");
1738 fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1739
1740 let pattern = sub.join("*.json").to_string_lossy().to_string();
1741 let c = ValidateArgs {
1742 globs: vec![pattern],
1743 exclude: vec![],
1744 cache_dir: None,
1745 force_schema_fetch: true,
1746 force_validation: true,
1747 no_catalog: true,
1748 config_dir: Some(tmp.path().to_path_buf()),
1749 schema_cache_ttl: None,
1750 };
1751
1752 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1753 assert!(!result.has_errors());
1754 Ok(())
1755 }
1756
1757 const FORMAT_SCHEMA: &str = r#"{
1760 "type": "object",
1761 "properties": {
1762 "link": { "type": "string", "format": "uri-reference" }
1763 }
1764 }"#;
1765
1766 #[tokio::test]
1767 async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1768 let tmp = tempfile::tempdir()?;
1769 let schema_path = tmp.path().join("schema.json");
1770 fs::write(&schema_path, FORMAT_SCHEMA)?;
1771
1772 let f = tmp.path().join("data.json");
1773 fs::write(
1774 &f,
1775 format!(
1776 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1777 schema_path.to_string_lossy()
1778 ),
1779 )?;
1780
1781 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1782 let c = ValidateArgs {
1783 globs: vec![pattern],
1784 exclude: vec![],
1785 cache_dir: None,
1786 force_schema_fetch: true,
1787 force_validation: true,
1788 no_catalog: true,
1789 config_dir: Some(tmp.path().to_path_buf()),
1790 schema_cache_ttl: None,
1791 };
1792 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1793 assert!(
1794 result.has_errors(),
1795 "expected format error without override"
1796 );
1797 Ok(())
1798 }
1799
1800 #[tokio::test]
1801 async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1802 let tmp = tempfile::tempdir()?;
1803 let schema_path = tmp.path().join("schema.json");
1804 fs::write(&schema_path, FORMAT_SCHEMA)?;
1805
1806 let f = tmp.path().join("data.json");
1807 fs::write(
1808 &f,
1809 format!(
1810 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1811 schema_path.to_string_lossy()
1812 ),
1813 )?;
1814
1815 fs::write(
1817 tmp.path().join("lintel.toml"),
1818 r#"
1819[[override]]
1820files = ["**/data.json"]
1821validate_formats = false
1822"#,
1823 )?;
1824
1825 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1826 let c = ValidateArgs {
1827 globs: vec![pattern],
1828 exclude: vec![],
1829 cache_dir: None,
1830 force_schema_fetch: true,
1831 force_validation: true,
1832 no_catalog: true,
1833 config_dir: Some(tmp.path().to_path_buf()),
1834 schema_cache_ttl: None,
1835 };
1836 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1837 assert!(
1838 !result.has_errors(),
1839 "expected no errors with validate_formats = false override"
1840 );
1841 Ok(())
1842 }
1843
1844 #[tokio::test]
1847 async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1848 let tmp = tempfile::tempdir()?;
1849 fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1850
1851 let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1852 let c = ValidateArgs {
1853 globs: vec![pattern],
1854 exclude: vec![],
1855 cache_dir: None,
1856 force_schema_fetch: true,
1857 force_validation: true,
1858 no_catalog: true,
1859 config_dir: Some(tmp.path().to_path_buf()),
1860 schema_cache_ttl: None,
1861 };
1862 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1863 assert!(!result.has_errors());
1864 assert_eq!(result.files_checked(), 0);
1865 Ok(())
1866 }
1867
1868 #[tokio::test]
1869 async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1870 let tmp = tempfile::tempdir()?;
1871 let cache_tmp = tempfile::tempdir()?;
1872 fs::write(
1874 tmp.path().join("myapp.cfg"),
1875 r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1876 )?;
1877
1878 let catalog_json = r#"{"version":1,"schemas":[{
1879 "name": "MyApp Config",
1880 "description": "MyApp configuration",
1881 "url": "https://example.com/myapp.schema.json",
1882 "fileMatch": ["*.cfg"]
1883 }]}"#;
1884 let schema =
1885 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1886
1887 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1888 let client = mock(&[
1889 (
1890 "https://www.schemastore.org/api/json/catalog.json",
1891 catalog_json,
1892 ),
1893 ("https://example.com/myapp.schema.json", schema),
1894 ]);
1895 let c = ValidateArgs {
1896 globs: vec![pattern],
1897 exclude: vec![],
1898 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1899 force_schema_fetch: true,
1900 force_validation: true,
1901 no_catalog: false,
1902 config_dir: Some(tmp.path().to_path_buf()),
1903 schema_cache_ttl: None,
1904 };
1905 let result = run_with(&c, Some(client), |_| {}).await?;
1906 assert!(!result.has_errors());
1907 assert_eq!(result.files_checked(), 1);
1908 Ok(())
1909 }
1910
1911 #[tokio::test]
1912 async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1913 let tmp = tempfile::tempdir()?;
1914 let cache_tmp = tempfile::tempdir()?;
1915 fs::write(
1917 tmp.path().join("myapp.cfg"),
1918 "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1919 )?;
1920
1921 let catalog_json = r#"{"version":1,"schemas":[{
1922 "name": "MyApp Config",
1923 "description": "MyApp configuration",
1924 "url": "https://example.com/myapp.schema.json",
1925 "fileMatch": ["*.cfg"]
1926 }]}"#;
1927
1928 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1929 let client = mock(&[(
1930 "https://www.schemastore.org/api/json/catalog.json",
1931 catalog_json,
1932 )]);
1933 let c = ValidateArgs {
1934 globs: vec![pattern],
1935 exclude: vec![],
1936 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1937 force_schema_fetch: true,
1938 force_validation: true,
1939 no_catalog: false,
1940 config_dir: Some(tmp.path().to_path_buf()),
1941 schema_cache_ttl: None,
1942 };
1943 let result = run_with(&c, Some(client), |_| {}).await?;
1944 assert!(!result.has_errors());
1945 assert_eq!(result.files_checked(), 0);
1946 Ok(())
1947 }
1948
1949 #[tokio::test]
1950 async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1951 let tmp = tempfile::tempdir()?;
1952 let cache_tmp = tempfile::tempdir()?;
1953 fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1955
1956 let catalog_json = r#"{"version":1,"schemas":[{
1957 "name": "MyApp Config",
1958 "description": "MyApp configuration",
1959 "url": "https://example.com/myapp.schema.json",
1960 "fileMatch": ["*.cfg"]
1961 }]}"#;
1962 let schema =
1963 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1964
1965 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1966 let client = mock(&[
1967 (
1968 "https://www.schemastore.org/api/json/catalog.json",
1969 catalog_json,
1970 ),
1971 ("https://example.com/myapp.schema.json", schema),
1972 ]);
1973 let c = ValidateArgs {
1974 globs: vec![pattern],
1975 exclude: vec![],
1976 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1977 force_schema_fetch: true,
1978 force_validation: true,
1979 no_catalog: false,
1980 config_dir: Some(tmp.path().to_path_buf()),
1981 schema_cache_ttl: None,
1982 };
1983 let result = run_with(&c, Some(client), |_| {}).await?;
1984 assert!(result.has_errors());
1985 assert_eq!(result.files_checked(), 1);
1986 Ok(())
1987 }
1988
1989 #[tokio::test]
1992 async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1993 let tmp = tempfile::tempdir()?;
1994 let schema_path = tmp.path().join("schema.json");
1995 fs::write(&schema_path, SCHEMA)?;
1996
1997 let f = tmp.path().join("valid.json");
1998 fs::write(
1999 &f,
2000 format!(
2001 r#"{{"$schema":"{}","name":"hello"}}"#,
2002 schema_path.to_string_lossy()
2003 ),
2004 )?;
2005
2006 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2007
2008 let c = ValidateArgs {
2010 globs: vec![pattern.clone()],
2011 exclude: vec![],
2012 cache_dir: None,
2013 force_schema_fetch: true,
2014 force_validation: false,
2015 no_catalog: true,
2016 config_dir: None,
2017 schema_cache_ttl: None,
2018 };
2019 let mut first_statuses = Vec::new();
2020 let result = run_with(&c, Some(mock(&[])), |cf| {
2021 first_statuses.push(cf.validation_cache_status);
2022 })
2023 .await?;
2024 assert!(!result.has_errors());
2025 assert!(result.files_checked() > 0);
2026
2027 assert!(
2029 first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2030 "expected at least one validation cache miss on first run"
2031 );
2032
2033 let mut second_statuses = Vec::new();
2035 let result = run_with(&c, Some(mock(&[])), |cf| {
2036 second_statuses.push(cf.validation_cache_status);
2037 })
2038 .await?;
2039 assert!(!result.has_errors());
2040
2041 assert!(
2043 second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2044 "expected at least one validation cache hit on second run"
2045 );
2046 Ok(())
2047 }
2048
2049 #[test]
2052 fn clean_strips_anyof_value() {
2053 let msg =
2054 r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2055 assert_eq!(
2056 clean_error_message(msg.to_string()),
2057 "not valid under any of the schemas listed in the 'anyOf' keyword"
2058 );
2059 }
2060
2061 #[test]
2062 fn clean_strips_oneof_value() {
2063 let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2064 assert_eq!(
2065 clean_error_message(msg.to_string()),
2066 "not valid under any of the schemas listed in the 'oneOf' keyword"
2067 );
2068 }
2069
2070 #[test]
2071 fn clean_strips_long_value() {
2072 let long_value = "x".repeat(5000);
2073 let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2074 let msg = format!("{long_value}{suffix}");
2075 assert_eq!(
2076 clean_error_message(msg),
2077 "not valid under any of the schemas listed in the 'anyOf' keyword"
2078 );
2079 }
2080
2081 #[test]
2082 fn clean_preserves_type_error() {
2083 let msg = r#"12345 is not of types "null", "string""#;
2084 assert_eq!(clean_error_message(msg.to_string()), msg);
2085 }
2086
2087 #[test]
2088 fn clean_preserves_required_property() {
2089 let msg = "\"name\" is a required property";
2090 assert_eq!(clean_error_message(msg.to_string()), msg);
2091 }
2092
2093 #[tokio::test]
2097 async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2098 let tmp = tempfile::tempdir()?;
2099
2100 let schema_body = r#"{
2102 "$schema": "http://json-schema.org/draft-07/schema#",
2103 "type": "object",
2104 "properties": { "name": { "type": "string" } },
2105 "required": ["name"]
2106 }"#;
2107
2108 let schema_url = "http://json-schema.org/draft-07/schema#";
2109
2110 let f = tmp.path().join("data.json");
2111 fs::write(
2112 &f,
2113 format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2114 )?;
2115
2116 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2117 let client = mock(&[(
2118 schema_url,
2120 schema_body,
2121 )]);
2122 let c = ValidateArgs {
2123 globs: vec![pattern],
2124 exclude: vec![],
2125 cache_dir: None,
2126 force_schema_fetch: true,
2127 force_validation: true,
2128 no_catalog: true,
2129 config_dir: None,
2130 schema_cache_ttl: None,
2131 };
2132 let result = run_with(&c, Some(client), |_| {}).await?;
2133 assert!(
2134 !result.has_errors(),
2135 "schema URI with fragment should not cause compilation error"
2136 );
2137 assert_eq!(result.files_checked(), 1);
2138 Ok(())
2139 }
2140
2141 #[tokio::test]
2142 async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2143 let tmp = tempfile::tempdir()?;
2144
2145 std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2147
2148 let schema_path = tmp.path().join("schema.json");
2150 std::fs::write(
2151 &schema_path,
2152 r#"{
2153 "type": "object",
2154 "properties": {
2155 "name": { "$ref": "./defs.json" }
2156 },
2157 "required": ["name"]
2158 }"#,
2159 )?;
2160
2161 let schema_uri = schema_path.to_string_lossy();
2163 std::fs::write(
2164 tmp.path().join("data.json"),
2165 format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2166 )?;
2167
2168 std::fs::write(
2170 tmp.path().join("bad.json"),
2171 format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2172 )?;
2173
2174 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2175 let args = ValidateArgs {
2176 globs: vec![pattern],
2177 exclude: vec![],
2178 cache_dir: None,
2179 force_schema_fetch: true,
2180 force_validation: true,
2181 no_catalog: true,
2182 config_dir: None,
2183 schema_cache_ttl: None,
2184 };
2185 let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2186
2187 assert!(result.has_errors());
2189 assert_eq!(result.errors.len(), 1);
2191 Ok(())
2192 }
2193}