1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result};
7use glob::glob;
8use serde_json::Value;
9
10use crate::catalog;
11use lintel_schema_cache::{CacheStatus, SchemaCache};
12use lintel_validation_cache::{ValidationCacheStatus, ValidationError};
13use schema_catalog::{CompiledCatalog, FileFormat};
14
15use crate::diagnostics::{DEFAULT_LABEL, find_instance_path_span, format_label};
16use crate::discover;
17use crate::parsers::{self, Parser};
18use crate::registry;
19
20const FD_CONCURRENCY_LIMIT: usize = 128;
24
25struct LocalRetriever {
28 http: SchemaCache,
29}
30
31#[async_trait::async_trait]
32impl jsonschema::AsyncRetrieve for LocalRetriever {
33 async fn retrieve(
34 &self,
35 uri: &jsonschema::Uri<String>,
36 ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
37 let s = uri.as_str();
38 if let Some(raw) = s.strip_prefix("file://") {
39 let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
40 let content = tokio::fs::read_to_string(path.as_ref()).await?;
41 Ok(serde_json::from_str(&content)?)
42 } else {
43 self.http.retrieve(uri).await
44 }
45 }
46}
47
48pub struct ValidateArgs {
49 pub globs: Vec<String>,
51
52 pub exclude: Vec<String>,
54
55 pub cache_dir: Option<String>,
57
58 pub force_schema_fetch: bool,
60
61 pub force_validation: bool,
63
64 pub no_catalog: bool,
66
67 pub config_dir: Option<PathBuf>,
69
70 pub schema_cache_ttl: Option<core::time::Duration>,
72}
73
74pub use crate::diagnostics::LintError;
77
78pub struct CheckedFile {
80 pub path: String,
81 pub schema: String,
82 pub cache_status: Option<CacheStatus>,
84 pub validation_cache_status: Option<ValidationCacheStatus>,
86}
87
88pub struct ValidateResult {
90 pub errors: Vec<LintError>,
91 pub checked: Vec<CheckedFile>,
92}
93
94impl ValidateResult {
95 pub fn has_errors(&self) -> bool {
96 !self.errors.is_empty()
97 }
98
99 pub fn files_checked(&self) -> usize {
100 self.checked.len()
101 }
102}
103
104struct ParsedFile {
110 path: String,
111 content: String,
112 instance: Value,
113 original_schema_uri: String,
115}
116
117#[tracing::instrument(skip_all)]
125pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
126 let start_dir = match search_dir {
127 Some(d) => d.to_path_buf(),
128 None => match std::env::current_dir() {
129 Ok(d) => d,
130 Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
131 },
132 };
133
134 let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
135 return (lintel_config::Config::default(), start_dir, None);
136 };
137
138 let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
139 let cfg = lintel_config::find_and_load(&start_dir)
140 .ok()
141 .flatten()
142 .unwrap_or_default();
143 (cfg, dir, Some(config_path))
144}
145
146#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
156pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
157 if globs.is_empty() {
158 return discover::discover_files(".", exclude);
159 }
160
161 let mut result = Vec::new();
162 for pattern in globs {
163 let path = Path::new(pattern);
164 if path.is_dir() {
165 result.extend(discover::discover_files(pattern, exclude)?);
166 } else {
167 for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
168 let path = entry?;
169 if path.is_file() && !is_excluded(&path, exclude) {
170 result.push(path);
171 }
172 }
173 }
174 }
175 Ok(result)
176}
177
178fn is_excluded(path: &Path, excludes: &[String]) -> bool {
179 let path_str = match path.to_str() {
180 Some(s) => s.strip_prefix("./").unwrap_or(s),
181 None => return false,
182 };
183 excludes
184 .iter()
185 .any(|pattern| glob_match::glob_match(pattern, path_str))
186}
187
188pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
197 use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
198 const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
199
200 for fmt in FORMATS {
201 let parser = parsers::parser_for(fmt);
202 if let Ok(val) = parser.parse(content, file_name) {
203 return Some((fmt, val));
204 }
205 }
206 None
207}
208
209enum FileResult {
212 Parsed {
213 schema_uri: String,
214 parsed: ParsedFile,
215 },
216 Error(LintError),
217 Skip,
218}
219
220fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
226 if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
227 return schema_uri.to_string();
228 }
229 if let Some(dir) = base_dir {
230 dir.join(schema_uri).to_string_lossy().to_string()
231 } else {
232 schema_uri.to_string()
233 }
234}
235
236#[allow(clippy::too_many_arguments)]
240fn process_one_file(
241 path: &Path,
242 content: String,
243 config: &lintel_config::Config,
244 config_dir: &Path,
245 compiled_catalogs: &[CompiledCatalog],
246) -> Vec<FileResult> {
247 let path_str = path.display().to_string();
248 let file_name = path
249 .file_name()
250 .and_then(|n| n.to_str())
251 .unwrap_or(&path_str);
252
253 let detected_format = parsers::detect_format(path);
254
255 if detected_format == Some(FileFormat::Jsonl) {
257 return process_jsonl_file(
258 path,
259 &path_str,
260 file_name,
261 &content,
262 config,
263 config_dir,
264 compiled_catalogs,
265 );
266 }
267
268 if detected_format.is_none() {
270 let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
271 || compiled_catalogs
272 .iter()
273 .any(|cat| cat.find_schema(&path_str, file_name).is_some());
274 if !has_match {
275 return vec![FileResult::Skip];
276 }
277 }
278
279 let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
281 let parser = parsers::parser_for(fmt);
282 match parser.parse(&content, &path_str) {
283 Ok(val) => (parser, val),
284 Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
285 }
286 } else {
287 match try_parse_all(&content, &path_str) {
288 Some((fmt, val)) => (parsers::parser_for(fmt), val),
289 None => return vec![FileResult::Skip],
290 }
291 };
292
293 if instance.is_null() {
295 return vec![FileResult::Skip];
296 }
297
298 let inline_uri = parser.extract_schema_uri(&content, &instance);
306 let from_inline = inline_uri.is_some();
307 let schema_uri = inline_uri
308 .or_else(|| {
309 config
310 .find_schema_mapping(&path_str, file_name)
311 .map(str::to_string)
312 })
313 .or_else(|| {
314 compiled_catalogs
315 .iter()
316 .find_map(|cat| cat.find_schema(&path_str, file_name))
317 .map(str::to_string)
318 });
319
320 let Some(schema_uri) = schema_uri else {
321 return vec![FileResult::Skip];
322 };
323
324 let original_schema_uri = schema_uri.clone();
326
327 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
329 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
330
331 let schema_uri = resolve_local_schema_path(
335 &schema_uri,
336 if from_inline {
337 path.parent()
338 } else {
339 Some(config_dir)
340 },
341 );
342
343 vec![FileResult::Parsed {
344 schema_uri,
345 parsed: ParsedFile {
346 path: path_str,
347 content,
348 instance,
349 original_schema_uri,
350 },
351 }]
352}
353
354#[allow(clippy::too_many_arguments)]
362fn process_jsonl_file(
363 path: &Path,
364 path_str: &str,
365 file_name: &str,
366 content: &str,
367 config: &lintel_config::Config,
368 config_dir: &Path,
369 compiled_catalogs: &[CompiledCatalog],
370) -> Vec<FileResult> {
371 let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
372 Ok(lines) => lines,
373 Err(parse_err) => return vec![FileResult::Error(parse_err.into())],
374 };
375
376 if lines.is_empty() {
377 return vec![FileResult::Skip];
378 }
379
380 let mut results = Vec::with_capacity(lines.len());
381
382 if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
384 for m in mismatches {
385 results.push(FileResult::Error(LintError::SchemaMismatch {
386 path: path_str.to_string(),
387 line_number: m.line_number,
388 message: format!("expected consistent $schema but found {}", m.schema_uri),
389 }));
390 }
391 }
392
393 for line in lines {
394 let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
397 let from_inline = inline_uri.is_some();
398 let schema_uri = inline_uri
399 .or_else(|| {
400 config
401 .find_schema_mapping(path_str, file_name)
402 .map(str::to_string)
403 })
404 .or_else(|| {
405 compiled_catalogs
406 .iter()
407 .find_map(|cat| cat.find_schema(path_str, file_name))
408 .map(str::to_string)
409 });
410
411 let Some(schema_uri) = schema_uri else {
412 continue;
413 };
414
415 let original_schema_uri = schema_uri.clone();
416
417 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
418 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
419
420 let schema_uri = resolve_local_schema_path(
422 &schema_uri,
423 if from_inline {
424 path.parent()
425 } else {
426 Some(config_dir)
427 },
428 );
429
430 let line_path = format!("{path_str}:{}", line.line_number);
431
432 results.push(FileResult::Parsed {
433 schema_uri,
434 parsed: ParsedFile {
435 path: line_path,
436 content: line.raw,
437 instance: line.value,
438 original_schema_uri,
439 },
440 });
441 }
442
443 if results.is_empty() {
444 vec![FileResult::Skip]
445 } else {
446 results
447 }
448}
449
450#[tracing::instrument(skip_all, fields(file_count = files.len()))]
453#[allow(clippy::too_many_arguments)]
454async fn parse_and_group_files(
455 files: &[PathBuf],
456 config: &lintel_config::Config,
457 config_dir: &Path,
458 compiled_catalogs: &[CompiledCatalog],
459 errors: &mut Vec<LintError>,
460) -> BTreeMap<String, Vec<ParsedFile>> {
461 let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
464 let mut read_set = tokio::task::JoinSet::new();
465 for path in files {
466 let path = path.clone();
467 let sem = semaphore.clone();
468 read_set.spawn(async move {
469 let _permit = sem.acquire().await.expect("semaphore closed");
470 let result = tokio::fs::read_to_string(&path).await;
471 (path, result)
472 });
473 }
474
475 let mut file_contents = Vec::with_capacity(files.len());
476 while let Some(result) = read_set.join_next().await {
477 match result {
478 Ok(item) => file_contents.push(item),
479 Err(e) => tracing::warn!("file read task panicked: {e}"),
480 }
481 }
482
483 let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
485 for (path, content_result) in file_contents {
486 let content = match content_result {
487 Ok(c) => c,
488 Err(e) => {
489 errors.push(LintError::Io {
490 path: path.display().to_string(),
491 message: format!("failed to read: {e}"),
492 });
493 continue;
494 }
495 };
496 let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
497 for result in results {
498 match result {
499 FileResult::Parsed { schema_uri, parsed } => {
500 schema_groups.entry(schema_uri).or_default().push(parsed);
501 }
502 FileResult::Error(e) => errors.push(e),
503 FileResult::Skip => {}
504 }
505 }
506 }
507
508 schema_groups
509}
510
511#[allow(clippy::too_many_arguments)]
520async fn fetch_schema_from_prefetched(
521 schema_uri: &str,
522 prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
523 local_cache: &mut HashMap<String, Value>,
524 group: &[ParsedFile],
525 errors: &mut Vec<LintError>,
526 checked: &mut Vec<CheckedFile>,
527 on_check: &mut impl FnMut(&CheckedFile),
528) -> Option<(Value, Option<CacheStatus>)> {
529 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
530
531 let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
532 match prefetched.get(schema_uri) {
533 Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
534 Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
535 None => Err(format!("schema not prefetched: {schema_uri}")),
536 }
537 } else if let Some(cached) = local_cache.get(schema_uri) {
538 Ok((cached.clone(), None))
539 } else {
540 tokio::fs::read_to_string(schema_uri)
541 .await
542 .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
543 .and_then(|content| {
544 serde_json::from_str::<Value>(&content)
545 .map(|v| {
546 local_cache.insert(schema_uri.to_string(), v.clone());
547 (v, None)
548 })
549 .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
550 })
551 };
552
553 match result {
554 Ok(value) => Some(value),
555 Err(message) => {
556 report_group_error(
557 |path| LintError::SchemaFetch {
558 path: path.to_string(),
559 message: message.clone(),
560 },
561 schema_uri,
562 None,
563 group,
564 errors,
565 checked,
566 on_check,
567 );
568 None
569 }
570 }
571}
572
573#[allow(clippy::too_many_arguments)]
575fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
576 make_error: impl Fn(&str) -> LintError,
577 schema_uri: &str,
578 cache_status: Option<CacheStatus>,
579 group: &[P],
580 errors: &mut Vec<LintError>,
581 checked: &mut Vec<CheckedFile>,
582 on_check: &mut impl FnMut(&CheckedFile),
583) {
584 for item in group {
585 let pf = item.borrow();
586 let cf = CheckedFile {
587 path: pf.path.clone(),
588 schema: schema_uri.to_string(),
589 cache_status,
590 validation_cache_status: None,
591 };
592 on_check(&cf);
593 checked.push(cf);
594 errors.push(make_error(&pf.path));
595 }
596}
597
598#[allow(clippy::too_many_arguments)]
600fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
601 schema_uri: &str,
602 cache_status: Option<CacheStatus>,
603 validation_cache_status: Option<ValidationCacheStatus>,
604 group: &[P],
605 checked: &mut Vec<CheckedFile>,
606 on_check: &mut impl FnMut(&CheckedFile),
607) {
608 for item in group {
609 let pf = item.borrow();
610 let cf = CheckedFile {
611 path: pf.path.clone(),
612 schema: schema_uri.to_string(),
613 cache_status,
614 validation_cache_status,
615 };
616 on_check(&cf);
617 checked.push(cf);
618 }
619}
620
621fn clean_error_message(msg: String) -> String {
630 const MARKER: &str = " is not valid under any of the schemas listed in the '";
631 if let Some(pos) = msg.find(MARKER) {
632 return msg[pos + 4..].to_string();
634 }
635 msg
636}
637
638fn push_validation_errors(
640 pf: &ParsedFile,
641 schema_url: &str,
642 validation_errors: &[ValidationError],
643 errors: &mut Vec<LintError>,
644) {
645 for ve in validation_errors {
646 let span = find_instance_path_span(&pf.content, &ve.instance_path);
647 let instance_path = if ve.instance_path.is_empty() {
648 DEFAULT_LABEL.to_string()
649 } else {
650 ve.instance_path.clone()
651 };
652 let label = format_label(&instance_path, &ve.schema_path);
653 let source_span: miette::SourceSpan = span.into();
654 errors.push(LintError::Validation {
655 src: miette::NamedSource::new(&pf.path, pf.content.clone()),
656 span: source_span,
657 schema_span: source_span,
658 path: pf.path.clone(),
659 instance_path,
660 label,
661 message: ve.message.clone(),
662 schema_url: schema_url.to_string(),
663 schema_path: ve.schema_path.clone(),
664 });
665 }
666}
667
668#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
671#[allow(clippy::too_many_arguments)]
672async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
673 validator: &jsonschema::Validator,
674 schema_uri: &str,
675 schema_hash: &str,
676 validate_formats: bool,
677 cache_status: Option<CacheStatus>,
678 group: &[P],
679 vcache: &lintel_validation_cache::ValidationCache,
680 errors: &mut Vec<LintError>,
681 checked: &mut Vec<CheckedFile>,
682 on_check: &mut impl FnMut(&CheckedFile),
683) {
684 for item in group {
685 let pf = item.borrow();
686 let file_errors: Vec<ValidationError> = validator
687 .iter_errors(&pf.instance)
688 .map(|error| ValidationError {
689 instance_path: error.instance_path().to_string(),
690 message: clean_error_message(error.to_string()),
691 schema_path: error.schema_path().to_string(),
692 })
693 .collect();
694
695 vcache
696 .store(
697 &lintel_validation_cache::CacheKey {
698 file_content: &pf.content,
699 schema_hash,
700 validate_formats,
701 },
702 &file_errors,
703 )
704 .await;
705 push_validation_errors(pf, schema_uri, &file_errors, errors);
706
707 let cf = CheckedFile {
708 path: pf.path.clone(),
709 schema: schema_uri.to_string(),
710 cache_status,
711 validation_cache_status: Some(ValidationCacheStatus::Miss),
712 };
713 on_check(&cf);
714 checked.push(cf);
715 }
716}
717
718pub async fn fetch_compiled_catalogs(
726 retriever: &SchemaCache,
727 config: &lintel_config::Config,
728 no_catalog: bool,
729) -> Vec<CompiledCatalog> {
730 let mut compiled_catalogs = Vec::new();
731
732 if !no_catalog {
733 let catalog_span = tracing::info_span!("fetch_catalogs").entered();
734
735 #[allow(clippy::items_after_statements)]
739 type CatalogResult = (
740 usize, String,
742 Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
743 );
744 let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
745
746 for (i, registry_url) in config.registries.iter().enumerate() {
748 let r = retriever.clone();
749 let url = registry_url.clone();
750 let label = format!("registry {url}");
751 catalog_tasks.spawn(async move {
752 let result = registry::fetch(&r, &url)
753 .await
754 .map(|cat| CompiledCatalog::compile(&cat));
755 (i, label, result)
756 });
757 }
758
759 let lintel_order = config.registries.len();
761 if !config.no_default_catalog {
762 let r = retriever.clone();
763 let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
764 catalog_tasks.spawn(async move {
765 let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
766 .await
767 .map(|cat| CompiledCatalog::compile(&cat));
768 (lintel_order, label, result)
769 });
770 }
771
772 let schemastore_order = config.registries.len() + 1;
774 let r = retriever.clone();
775 catalog_tasks.spawn(async move {
776 let result = catalog::fetch_catalog(&r)
777 .await
778 .map(|cat| CompiledCatalog::compile(&cat));
779 (schemastore_order, "SchemaStore catalog".to_string(), result)
780 });
781
782 let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
783 while let Some(result) = catalog_tasks.join_next().await {
784 match result {
785 Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
786 Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
787 Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
788 }
789 }
790 results.sort_by_key(|(order, _)| *order);
791 compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
792
793 drop(catalog_span);
794 }
795
796 compiled_catalogs
797}
798
799pub async fn run(args: &ValidateArgs) -> Result<ValidateResult> {
803 run_with(args, None, |_| {}).await
804}
805
806#[tracing::instrument(skip_all, name = "validate")]
813#[allow(clippy::too_many_lines)]
814pub async fn run_with(
815 args: &ValidateArgs,
816 cache: Option<SchemaCache>,
817 mut on_check: impl FnMut(&CheckedFile),
818) -> Result<ValidateResult> {
819 let retriever = if let Some(c) = cache {
820 c
821 } else {
822 let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
823 if let Some(dir) = &args.cache_dir {
824 let path = PathBuf::from(dir);
825 let _ = fs::create_dir_all(&path);
826 builder = builder.cache_dir(path);
827 }
828 if let Some(ttl) = args.schema_cache_ttl {
829 builder = builder.ttl(ttl);
830 }
831 builder.build()
832 };
833
834 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
835 let files = collect_files(&args.globs, &args.exclude)?;
836 tracing::info!(file_count = files.len(), "collected files");
837
838 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
839
840 let mut errors: Vec<LintError> = Vec::new();
841 let mut checked: Vec<CheckedFile> = Vec::new();
842
843 let schema_groups = parse_and_group_files(
845 &files,
846 &config,
847 &config_dir,
848 &compiled_catalogs,
849 &mut errors,
850 )
851 .await;
852 tracing::info!(
853 schema_count = schema_groups.len(),
854 total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
855 "grouped files by schema"
856 );
857
858 let vcache = lintel_validation_cache::ValidationCache::new(
860 lintel_validation_cache::ensure_cache_dir(),
861 args.force_validation,
862 );
863
864 let remote_uris: Vec<&String> = schema_groups
866 .keys()
867 .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
868 .collect();
869
870 let prefetched = {
871 let _prefetch_span =
872 tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
873
874 let mut schema_tasks = tokio::task::JoinSet::new();
875 for uri in remote_uris {
876 let r = retriever.clone();
877 let u = uri.clone();
878 schema_tasks.spawn(async move {
879 let result = r.fetch(&u).await;
880 (u, result)
881 });
882 }
883
884 let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
885 while let Some(result) = schema_tasks.join_next().await {
886 match result {
887 Ok((uri, fetch_result)) => {
888 prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
889 }
890 Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
891 }
892 }
893
894 prefetched
895 };
896
897 let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
899 let mut fetch_time = core::time::Duration::ZERO;
900 let mut hash_time = core::time::Duration::ZERO;
901 let mut vcache_time = core::time::Duration::ZERO;
902 let mut compile_time = core::time::Duration::ZERO;
903 let mut validate_time = core::time::Duration::ZERO;
904
905 for (schema_uri, group) in &schema_groups {
906 let _group_span = tracing::debug_span!(
907 "schema_group",
908 schema = schema_uri.as_str(),
909 files = group.len(),
910 )
911 .entered();
912
913 let validate_formats = group.iter().all(|pf| {
916 config
917 .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
918 });
919
920 let t = std::time::Instant::now();
923 let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
924 schema_uri,
925 &prefetched,
926 &mut local_schema_cache,
927 group,
928 &mut errors,
929 &mut checked,
930 &mut on_check,
931 )
932 .await
933 else {
934 fetch_time += t.elapsed();
935 continue;
936 };
937 fetch_time += t.elapsed();
938
939 let t = std::time::Instant::now();
941 let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
942 hash_time += t.elapsed();
943
944 let mut cache_misses: Vec<&ParsedFile> = Vec::new();
946
947 let t = std::time::Instant::now();
948 for pf in group {
949 let (cached, vcache_status) = vcache
950 .lookup(&lintel_validation_cache::CacheKey {
951 file_content: &pf.content,
952 schema_hash: &schema_hash,
953 validate_formats,
954 })
955 .await;
956
957 if let Some(cached_errors) = cached {
958 push_validation_errors(pf, schema_uri, &cached_errors, &mut errors);
959 let cf = CheckedFile {
960 path: pf.path.clone(),
961 schema: schema_uri.clone(),
962 cache_status,
963 validation_cache_status: Some(vcache_status),
964 };
965 on_check(&cf);
966 checked.push(cf);
967 } else {
968 cache_misses.push(pf);
969 }
970 }
971 vcache_time += t.elapsed();
972
973 tracing::debug!(
974 cache_hits = group.len() - cache_misses.len(),
975 cache_misses = cache_misses.len(),
976 "validation cache"
977 );
978
979 if cache_misses.is_empty() {
981 continue;
982 }
983
984 let t = std::time::Instant::now();
986 let validator = {
987 let is_remote_schema =
991 schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
992 let local_retriever = LocalRetriever {
993 http: retriever.clone(),
994 };
995 let opts = jsonschema::async_options()
996 .with_retriever(local_retriever)
997 .should_validate_formats(validate_formats);
998 let base_uri = if is_remote_schema {
999 Some(schema_uri.clone())
1000 } else {
1001 std::fs::canonicalize(schema_uri)
1002 .ok()
1003 .map(|p| format!("file://{}", p.display()))
1004 };
1005 let opts = if let Some(uri) = base_uri {
1006 opts.with_base_uri(uri)
1007 } else {
1008 opts
1009 };
1010 match opts.build(&schema_value).await {
1011 Ok(v) => v,
1012 Err(e) => {
1013 compile_time += t.elapsed();
1014 if !validate_formats && e.to_string().contains("uri-reference") {
1018 mark_group_checked(
1019 schema_uri,
1020 cache_status,
1021 Some(ValidationCacheStatus::Miss),
1022 &cache_misses,
1023 &mut checked,
1024 &mut on_check,
1025 );
1026 continue;
1027 }
1028 let msg = format!("failed to compile schema: {e}");
1029 report_group_error(
1030 |path| LintError::SchemaCompile {
1031 path: path.to_string(),
1032 message: msg.clone(),
1033 },
1034 schema_uri,
1035 cache_status,
1036 &cache_misses,
1037 &mut errors,
1038 &mut checked,
1039 &mut on_check,
1040 );
1041 continue;
1042 }
1043 }
1044 };
1045 compile_time += t.elapsed();
1046
1047 let t = std::time::Instant::now();
1048 validate_group(
1049 &validator,
1050 schema_uri,
1051 &schema_hash,
1052 validate_formats,
1053 cache_status,
1054 &cache_misses,
1055 &vcache,
1056 &mut errors,
1057 &mut checked,
1058 &mut on_check,
1059 )
1060 .await;
1061 validate_time += t.elapsed();
1062 }
1063
1064 #[allow(clippy::cast_possible_truncation)]
1065 {
1066 tracing::info!(
1067 fetch_ms = fetch_time.as_millis() as u64,
1068 hash_ms = hash_time.as_millis() as u64,
1069 vcache_ms = vcache_time.as_millis() as u64,
1070 compile_ms = compile_time.as_millis() as u64,
1071 validate_ms = validate_time.as_millis() as u64,
1072 "phase2 breakdown"
1073 );
1074 }
1075
1076 errors.sort_by(|a, b| {
1078 a.path()
1079 .cmp(b.path())
1080 .then_with(|| a.offset().cmp(&b.offset()))
1081 });
1082
1083 Ok(ValidateResult { errors, checked })
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088 use super::*;
1089 use lintel_schema_cache::SchemaCache;
1090 use std::path::Path;
1091
1092 fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1093 let cache = SchemaCache::memory();
1094 for (uri, body) in entries {
1095 cache.insert(
1096 uri,
1097 serde_json::from_str(body).expect("test mock: invalid JSON"),
1098 );
1099 }
1100 cache
1101 }
1102
1103 fn testdata() -> PathBuf {
1104 Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1105 }
1106
1107 fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1109 dirs.iter()
1110 .flat_map(|dir| {
1111 let base = testdata().join(dir);
1112 vec![
1113 base.join("*.json").to_string_lossy().to_string(),
1114 base.join("*.yaml").to_string_lossy().to_string(),
1115 base.join("*.yml").to_string_lossy().to_string(),
1116 base.join("*.json5").to_string_lossy().to_string(),
1117 base.join("*.jsonc").to_string_lossy().to_string(),
1118 base.join("*.toml").to_string_lossy().to_string(),
1119 ]
1120 })
1121 .collect()
1122 }
1123
1124 fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1125 ValidateArgs {
1126 globs: scenario_globs(dirs),
1127 exclude: vec![],
1128 cache_dir: None,
1129 force_schema_fetch: true,
1130 force_validation: true,
1131 no_catalog: true,
1132 config_dir: None,
1133 schema_cache_ttl: None,
1134 }
1135 }
1136
1137 const SCHEMA: &str =
1138 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1139
1140 fn schema_mock() -> SchemaCache {
1141 mock(&[("https://example.com/schema.json", SCHEMA)])
1142 }
1143
1144 #[tokio::test]
1147 async fn no_matching_files() -> anyhow::Result<()> {
1148 let tmp = tempfile::tempdir()?;
1149 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1150 let c = ValidateArgs {
1151 globs: vec![pattern],
1152 exclude: vec![],
1153 cache_dir: None,
1154 force_schema_fetch: true,
1155 force_validation: true,
1156 no_catalog: true,
1157 config_dir: None,
1158 schema_cache_ttl: None,
1159 };
1160 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1161 assert!(!result.has_errors());
1162 Ok(())
1163 }
1164
1165 #[tokio::test]
1166 async fn dir_all_valid() -> anyhow::Result<()> {
1167 let c = args_for_dirs(&["positive_tests"]);
1168 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1169 assert!(!result.has_errors());
1170 Ok(())
1171 }
1172
1173 #[tokio::test]
1174 async fn dir_all_invalid() -> anyhow::Result<()> {
1175 let c = args_for_dirs(&["negative_tests"]);
1176 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1177 assert!(result.has_errors());
1178 Ok(())
1179 }
1180
1181 #[tokio::test]
1182 async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1183 let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1184 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1185 assert!(result.has_errors());
1186 Ok(())
1187 }
1188
1189 #[tokio::test]
1190 async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1191 let c = args_for_dirs(&["no_schema"]);
1192 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1193 assert!(!result.has_errors());
1194 Ok(())
1195 }
1196
1197 #[tokio::test]
1198 async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1199 let c = args_for_dirs(&["positive_tests", "no_schema"]);
1200 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1201 assert!(!result.has_errors());
1202 Ok(())
1203 }
1204
1205 #[tokio::test]
1208 async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1209 let dir = testdata().join("positive_tests");
1210 let c = ValidateArgs {
1211 globs: vec![dir.to_string_lossy().to_string()],
1212 exclude: vec![],
1213 cache_dir: None,
1214 force_schema_fetch: true,
1215 force_validation: true,
1216 no_catalog: true,
1217 config_dir: None,
1218 schema_cache_ttl: None,
1219 };
1220 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1221 assert!(!result.has_errors());
1222 assert!(result.files_checked() > 0);
1223 Ok(())
1224 }
1225
1226 #[tokio::test]
1227 async fn multiple_directory_args() -> anyhow::Result<()> {
1228 let pos_dir = testdata().join("positive_tests");
1229 let no_schema_dir = testdata().join("no_schema");
1230 let c = ValidateArgs {
1231 globs: vec![
1232 pos_dir.to_string_lossy().to_string(),
1233 no_schema_dir.to_string_lossy().to_string(),
1234 ],
1235 exclude: vec![],
1236 cache_dir: None,
1237 force_schema_fetch: true,
1238 force_validation: true,
1239 no_catalog: true,
1240 config_dir: None,
1241 schema_cache_ttl: None,
1242 };
1243 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1244 assert!(!result.has_errors());
1245 Ok(())
1246 }
1247
1248 #[tokio::test]
1249 async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1250 let dir = testdata().join("positive_tests");
1251 let glob_pattern = testdata()
1252 .join("no_schema")
1253 .join("*.json")
1254 .to_string_lossy()
1255 .to_string();
1256 let c = ValidateArgs {
1257 globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1258 exclude: vec![],
1259 cache_dir: None,
1260 force_schema_fetch: true,
1261 force_validation: true,
1262 no_catalog: true,
1263 config_dir: None,
1264 schema_cache_ttl: None,
1265 };
1266 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1267 assert!(!result.has_errors());
1268 Ok(())
1269 }
1270
1271 #[tokio::test]
1272 async fn malformed_json_parse_error() -> anyhow::Result<()> {
1273 let base = testdata().join("malformed");
1274 let c = ValidateArgs {
1275 globs: vec![base.join("*.json").to_string_lossy().to_string()],
1276 exclude: vec![],
1277 cache_dir: None,
1278 force_schema_fetch: true,
1279 force_validation: true,
1280 no_catalog: true,
1281 config_dir: None,
1282 schema_cache_ttl: None,
1283 };
1284 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1285 assert!(result.has_errors());
1286 Ok(())
1287 }
1288
1289 #[tokio::test]
1290 async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1291 let base = testdata().join("malformed");
1292 let c = ValidateArgs {
1293 globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1294 exclude: vec![],
1295 cache_dir: None,
1296 force_schema_fetch: true,
1297 force_validation: true,
1298 no_catalog: true,
1299 config_dir: None,
1300 schema_cache_ttl: None,
1301 };
1302 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1303 assert!(result.has_errors());
1304 Ok(())
1305 }
1306
1307 #[tokio::test]
1310 async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1311 let base = testdata().join("negative_tests");
1312 let c = ValidateArgs {
1313 globs: scenario_globs(&["positive_tests", "negative_tests"]),
1314 exclude: vec![
1315 base.join("missing_name.json").to_string_lossy().to_string(),
1316 base.join("missing_name.toml").to_string_lossy().to_string(),
1317 base.join("missing_name.yaml").to_string_lossy().to_string(),
1318 ],
1319 cache_dir: None,
1320 force_schema_fetch: true,
1321 force_validation: true,
1322 no_catalog: true,
1323 config_dir: None,
1324 schema_cache_ttl: None,
1325 };
1326 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1327 assert!(!result.has_errors());
1328 Ok(())
1329 }
1330
1331 #[tokio::test]
1334 async fn custom_cache_dir() -> anyhow::Result<()> {
1335 let c = ValidateArgs {
1336 globs: scenario_globs(&["positive_tests"]),
1337 exclude: vec![],
1338 cache_dir: None,
1339 force_schema_fetch: true,
1340 force_validation: true,
1341 no_catalog: true,
1342 config_dir: None,
1343 schema_cache_ttl: None,
1344 };
1345 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1346 assert!(!result.has_errors());
1347 Ok(())
1348 }
1349
1350 #[tokio::test]
1353 async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1354 let tmp = tempfile::tempdir()?;
1355 let schema_path = tmp.path().join("schema.json");
1356 fs::write(&schema_path, SCHEMA)?;
1357
1358 let f = tmp.path().join("valid.json");
1359 fs::write(
1360 &f,
1361 format!(
1362 r#"{{"$schema":"{}","name":"hello"}}"#,
1363 schema_path.to_string_lossy()
1364 ),
1365 )?;
1366
1367 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1368 let c = ValidateArgs {
1369 globs: vec![pattern],
1370 exclude: vec![],
1371 cache_dir: None,
1372 force_schema_fetch: true,
1373 force_validation: true,
1374 no_catalog: true,
1375 config_dir: None,
1376 schema_cache_ttl: None,
1377 };
1378 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1379 assert!(!result.has_errors());
1380 Ok(())
1381 }
1382
1383 #[tokio::test]
1384 async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1385 let tmp = tempfile::tempdir()?;
1386 let schema_path = tmp.path().join("schema.json");
1387 fs::write(&schema_path, SCHEMA)?;
1388
1389 let f = tmp.path().join("valid.yaml");
1390 fs::write(
1391 &f,
1392 format!(
1393 "# yaml-language-server: $schema={}\nname: hello\n",
1394 schema_path.to_string_lossy()
1395 ),
1396 )?;
1397
1398 let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1399 let c = ValidateArgs {
1400 globs: vec![pattern],
1401 exclude: vec![],
1402 cache_dir: None,
1403 force_schema_fetch: true,
1404 force_validation: true,
1405 no_catalog: true,
1406 config_dir: None,
1407 schema_cache_ttl: None,
1408 };
1409 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1410 assert!(!result.has_errors());
1411 Ok(())
1412 }
1413
1414 #[tokio::test]
1415 async fn missing_local_schema_errors() -> anyhow::Result<()> {
1416 let tmp = tempfile::tempdir()?;
1417 let f = tmp.path().join("ref.json");
1418 fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1419
1420 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1421 let c = ValidateArgs {
1422 globs: vec![pattern],
1423 exclude: vec![],
1424 cache_dir: None,
1425 force_schema_fetch: true,
1426 force_validation: true,
1427 no_catalog: true,
1428 config_dir: None,
1429 schema_cache_ttl: None,
1430 };
1431 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1432 assert!(result.has_errors());
1433 Ok(())
1434 }
1435
1436 #[tokio::test]
1439 async fn json5_valid_with_schema() -> anyhow::Result<()> {
1440 let tmp = tempfile::tempdir()?;
1441 let schema_path = tmp.path().join("schema.json");
1442 fs::write(&schema_path, SCHEMA)?;
1443
1444 let f = tmp.path().join("config.json5");
1445 fs::write(
1446 &f,
1447 format!(
1448 r#"{{
1449 // JSON5 comment
1450 "$schema": "{}",
1451 name: "hello",
1452}}"#,
1453 schema_path.to_string_lossy()
1454 ),
1455 )?;
1456
1457 let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1458 let c = ValidateArgs {
1459 globs: vec![pattern],
1460 exclude: vec![],
1461 cache_dir: None,
1462 force_schema_fetch: true,
1463 force_validation: true,
1464 no_catalog: true,
1465 config_dir: None,
1466 schema_cache_ttl: None,
1467 };
1468 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1469 assert!(!result.has_errors());
1470 Ok(())
1471 }
1472
1473 #[tokio::test]
1474 async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1475 let tmp = tempfile::tempdir()?;
1476 let schema_path = tmp.path().join("schema.json");
1477 fs::write(&schema_path, SCHEMA)?;
1478
1479 let f = tmp.path().join("config.jsonc");
1480 fs::write(
1481 &f,
1482 format!(
1483 r#"{{
1484 /* JSONC comment */
1485 "$schema": "{}",
1486 "name": "hello"
1487}}"#,
1488 schema_path.to_string_lossy()
1489 ),
1490 )?;
1491
1492 let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1493 let c = ValidateArgs {
1494 globs: vec![pattern],
1495 exclude: vec![],
1496 cache_dir: None,
1497 force_schema_fetch: true,
1498 force_validation: true,
1499 no_catalog: true,
1500 config_dir: None,
1501 schema_cache_ttl: None,
1502 };
1503 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1504 assert!(!result.has_errors());
1505 Ok(())
1506 }
1507
1508 const GH_WORKFLOW_SCHEMA: &str = r#"{
1511 "type": "object",
1512 "properties": {
1513 "name": { "type": "string" },
1514 "on": {},
1515 "jobs": { "type": "object" }
1516 },
1517 "required": ["on", "jobs"]
1518 }"#;
1519
1520 fn gh_catalog_json() -> String {
1521 r#"{"version":1,"schemas":[{
1522 "name": "GitHub Workflow",
1523 "description": "GitHub Actions workflow",
1524 "url": "https://www.schemastore.org/github-workflow.json",
1525 "fileMatch": [
1526 "**/.github/workflows/*.yml",
1527 "**/.github/workflows/*.yaml"
1528 ]
1529 }]}"#
1530 .to_string()
1531 }
1532
1533 #[tokio::test]
1534 async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1535 let tmp = tempfile::tempdir()?;
1536 let cache_tmp = tempfile::tempdir()?;
1537 let wf_dir = tmp.path().join(".github/workflows");
1538 fs::create_dir_all(&wf_dir)?;
1539 fs::write(
1540 wf_dir.join("ci.yml"),
1541 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1542 )?;
1543
1544 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1545 let client = mock(&[
1546 (
1547 "https://www.schemastore.org/api/json/catalog.json",
1548 &gh_catalog_json(),
1549 ),
1550 (
1551 "https://www.schemastore.org/github-workflow.json",
1552 GH_WORKFLOW_SCHEMA,
1553 ),
1554 ]);
1555 let c = ValidateArgs {
1556 globs: vec![pattern],
1557 exclude: vec![],
1558 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1559 force_schema_fetch: true,
1560 force_validation: true,
1561 no_catalog: false,
1562 config_dir: None,
1563 schema_cache_ttl: None,
1564 };
1565 let result = run_with(&c, Some(client), |_| {}).await?;
1566 assert!(!result.has_errors());
1567 Ok(())
1568 }
1569
1570 #[tokio::test]
1571 async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1572 let tmp = tempfile::tempdir()?;
1573 let cache_tmp = tempfile::tempdir()?;
1574 let wf_dir = tmp.path().join(".github/workflows");
1575 fs::create_dir_all(&wf_dir)?;
1576 fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1577
1578 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1579 let client = mock(&[
1580 (
1581 "https://www.schemastore.org/api/json/catalog.json",
1582 &gh_catalog_json(),
1583 ),
1584 (
1585 "https://www.schemastore.org/github-workflow.json",
1586 GH_WORKFLOW_SCHEMA,
1587 ),
1588 ]);
1589 let c = ValidateArgs {
1590 globs: vec![pattern],
1591 exclude: vec![],
1592 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1593 force_schema_fetch: true,
1594 force_validation: true,
1595 no_catalog: false,
1596 config_dir: None,
1597 schema_cache_ttl: None,
1598 };
1599 let result = run_with(&c, Some(client), |_| {}).await?;
1600 assert!(result.has_errors());
1601 Ok(())
1602 }
1603
1604 #[tokio::test]
1605 async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1606 let tmp = tempfile::tempdir()?;
1607 let cache_tmp = tempfile::tempdir()?;
1608 let wf_dir = tmp.path().join(".github/workflows");
1609 fs::create_dir_all(&wf_dir)?;
1610 fs::write(
1611 wf_dir.join("ci.yml"),
1612 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1613 )?;
1614
1615 let client = mock(&[
1616 (
1617 "https://www.schemastore.org/api/json/catalog.json",
1618 &gh_catalog_json(),
1619 ),
1620 (
1621 "https://www.schemastore.org/github-workflow.json",
1622 GH_WORKFLOW_SCHEMA,
1623 ),
1624 ]);
1625 let c = ValidateArgs {
1626 globs: vec![],
1627 exclude: vec![],
1628 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1629 force_schema_fetch: true,
1630 force_validation: true,
1631 no_catalog: false,
1632 config_dir: None,
1633 schema_cache_ttl: None,
1634 };
1635
1636 let orig_dir = std::env::current_dir()?;
1637 std::env::set_current_dir(tmp.path())?;
1638 let result = run_with(&c, Some(client), |_| {}).await?;
1639 std::env::set_current_dir(orig_dir)?;
1640
1641 assert!(!result.has_errors());
1642 Ok(())
1643 }
1644
1645 #[tokio::test]
1648 async fn toml_valid_with_schema() -> anyhow::Result<()> {
1649 let tmp = tempfile::tempdir()?;
1650 let schema_path = tmp.path().join("schema.json");
1651 fs::write(&schema_path, SCHEMA)?;
1652
1653 let f = tmp.path().join("config.toml");
1654 fs::write(
1655 &f,
1656 format!(
1657 "# :schema {}\nname = \"hello\"\n",
1658 schema_path.to_string_lossy()
1659 ),
1660 )?;
1661
1662 let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1663 let c = ValidateArgs {
1664 globs: vec![pattern],
1665 exclude: vec![],
1666 cache_dir: None,
1667 force_schema_fetch: true,
1668 force_validation: true,
1669 no_catalog: true,
1670 config_dir: None,
1671 schema_cache_ttl: None,
1672 };
1673 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1674 assert!(!result.has_errors());
1675 Ok(())
1676 }
1677
1678 #[tokio::test]
1681 async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1682 let tmp = tempfile::tempdir()?;
1683
1684 let schemas_dir = tmp.path().join("schemas");
1685 fs::create_dir_all(&schemas_dir)?;
1686 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1687
1688 fs::write(
1689 tmp.path().join("lintel.toml"),
1690 r#"
1691[rewrite]
1692"http://localhost:9000/" = "//schemas/"
1693"#,
1694 )?;
1695
1696 let f = tmp.path().join("config.json");
1697 fs::write(
1698 &f,
1699 r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1700 )?;
1701
1702 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1703 let c = ValidateArgs {
1704 globs: vec![pattern],
1705 exclude: vec![],
1706 cache_dir: None,
1707 force_schema_fetch: true,
1708 force_validation: true,
1709 no_catalog: true,
1710 config_dir: Some(tmp.path().to_path_buf()),
1711 schema_cache_ttl: None,
1712 };
1713
1714 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1715 assert!(!result.has_errors());
1716 assert_eq!(result.files_checked(), 1);
1717 Ok(())
1718 }
1719
1720 #[tokio::test]
1721 async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1722 let tmp = tempfile::tempdir()?;
1723
1724 let schemas_dir = tmp.path().join("schemas");
1725 fs::create_dir_all(&schemas_dir)?;
1726 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1727
1728 fs::write(tmp.path().join("lintel.toml"), "")?;
1729
1730 let sub = tmp.path().join("deeply/nested");
1731 fs::create_dir_all(&sub)?;
1732 let f = sub.join("config.json");
1733 fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1734
1735 let pattern = sub.join("*.json").to_string_lossy().to_string();
1736 let c = ValidateArgs {
1737 globs: vec![pattern],
1738 exclude: vec![],
1739 cache_dir: None,
1740 force_schema_fetch: true,
1741 force_validation: true,
1742 no_catalog: true,
1743 config_dir: Some(tmp.path().to_path_buf()),
1744 schema_cache_ttl: None,
1745 };
1746
1747 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1748 assert!(!result.has_errors());
1749 Ok(())
1750 }
1751
1752 const FORMAT_SCHEMA: &str = r#"{
1755 "type": "object",
1756 "properties": {
1757 "link": { "type": "string", "format": "uri-reference" }
1758 }
1759 }"#;
1760
1761 #[tokio::test]
1762 async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1763 let tmp = tempfile::tempdir()?;
1764 let schema_path = tmp.path().join("schema.json");
1765 fs::write(&schema_path, FORMAT_SCHEMA)?;
1766
1767 let f = tmp.path().join("data.json");
1768 fs::write(
1769 &f,
1770 format!(
1771 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1772 schema_path.to_string_lossy()
1773 ),
1774 )?;
1775
1776 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1777 let c = ValidateArgs {
1778 globs: vec![pattern],
1779 exclude: vec![],
1780 cache_dir: None,
1781 force_schema_fetch: true,
1782 force_validation: true,
1783 no_catalog: true,
1784 config_dir: Some(tmp.path().to_path_buf()),
1785 schema_cache_ttl: None,
1786 };
1787 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1788 assert!(
1789 result.has_errors(),
1790 "expected format error without override"
1791 );
1792 Ok(())
1793 }
1794
1795 #[tokio::test]
1796 async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1797 let tmp = tempfile::tempdir()?;
1798 let schema_path = tmp.path().join("schema.json");
1799 fs::write(&schema_path, FORMAT_SCHEMA)?;
1800
1801 let f = tmp.path().join("data.json");
1802 fs::write(
1803 &f,
1804 format!(
1805 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1806 schema_path.to_string_lossy()
1807 ),
1808 )?;
1809
1810 fs::write(
1812 tmp.path().join("lintel.toml"),
1813 r#"
1814[[override]]
1815files = ["**/data.json"]
1816validate_formats = false
1817"#,
1818 )?;
1819
1820 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1821 let c = ValidateArgs {
1822 globs: vec![pattern],
1823 exclude: vec![],
1824 cache_dir: None,
1825 force_schema_fetch: true,
1826 force_validation: true,
1827 no_catalog: true,
1828 config_dir: Some(tmp.path().to_path_buf()),
1829 schema_cache_ttl: None,
1830 };
1831 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1832 assert!(
1833 !result.has_errors(),
1834 "expected no errors with validate_formats = false override"
1835 );
1836 Ok(())
1837 }
1838
1839 #[tokio::test]
1842 async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1843 let tmp = tempfile::tempdir()?;
1844 fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1845
1846 let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1847 let c = ValidateArgs {
1848 globs: vec![pattern],
1849 exclude: vec![],
1850 cache_dir: None,
1851 force_schema_fetch: true,
1852 force_validation: true,
1853 no_catalog: true,
1854 config_dir: Some(tmp.path().to_path_buf()),
1855 schema_cache_ttl: None,
1856 };
1857 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1858 assert!(!result.has_errors());
1859 assert_eq!(result.files_checked(), 0);
1860 Ok(())
1861 }
1862
1863 #[tokio::test]
1864 async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1865 let tmp = tempfile::tempdir()?;
1866 let cache_tmp = tempfile::tempdir()?;
1867 fs::write(
1869 tmp.path().join("myapp.cfg"),
1870 r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1871 )?;
1872
1873 let catalog_json = r#"{"version":1,"schemas":[{
1874 "name": "MyApp Config",
1875 "description": "MyApp configuration",
1876 "url": "https://example.com/myapp.schema.json",
1877 "fileMatch": ["*.cfg"]
1878 }]}"#;
1879 let schema =
1880 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1881
1882 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1883 let client = mock(&[
1884 (
1885 "https://www.schemastore.org/api/json/catalog.json",
1886 catalog_json,
1887 ),
1888 ("https://example.com/myapp.schema.json", schema),
1889 ]);
1890 let c = ValidateArgs {
1891 globs: vec![pattern],
1892 exclude: vec![],
1893 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1894 force_schema_fetch: true,
1895 force_validation: true,
1896 no_catalog: false,
1897 config_dir: Some(tmp.path().to_path_buf()),
1898 schema_cache_ttl: None,
1899 };
1900 let result = run_with(&c, Some(client), |_| {}).await?;
1901 assert!(!result.has_errors());
1902 assert_eq!(result.files_checked(), 1);
1903 Ok(())
1904 }
1905
1906 #[tokio::test]
1907 async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1908 let tmp = tempfile::tempdir()?;
1909 let cache_tmp = tempfile::tempdir()?;
1910 fs::write(
1912 tmp.path().join("myapp.cfg"),
1913 "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1914 )?;
1915
1916 let catalog_json = r#"{"version":1,"schemas":[{
1917 "name": "MyApp Config",
1918 "description": "MyApp configuration",
1919 "url": "https://example.com/myapp.schema.json",
1920 "fileMatch": ["*.cfg"]
1921 }]}"#;
1922
1923 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1924 let client = mock(&[(
1925 "https://www.schemastore.org/api/json/catalog.json",
1926 catalog_json,
1927 )]);
1928 let c = ValidateArgs {
1929 globs: vec![pattern],
1930 exclude: vec![],
1931 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1932 force_schema_fetch: true,
1933 force_validation: true,
1934 no_catalog: false,
1935 config_dir: Some(tmp.path().to_path_buf()),
1936 schema_cache_ttl: None,
1937 };
1938 let result = run_with(&c, Some(client), |_| {}).await?;
1939 assert!(!result.has_errors());
1940 assert_eq!(result.files_checked(), 0);
1941 Ok(())
1942 }
1943
1944 #[tokio::test]
1945 async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1946 let tmp = tempfile::tempdir()?;
1947 let cache_tmp = tempfile::tempdir()?;
1948 fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1950
1951 let catalog_json = r#"{"version":1,"schemas":[{
1952 "name": "MyApp Config",
1953 "description": "MyApp configuration",
1954 "url": "https://example.com/myapp.schema.json",
1955 "fileMatch": ["*.cfg"]
1956 }]}"#;
1957 let schema =
1958 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1959
1960 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1961 let client = mock(&[
1962 (
1963 "https://www.schemastore.org/api/json/catalog.json",
1964 catalog_json,
1965 ),
1966 ("https://example.com/myapp.schema.json", schema),
1967 ]);
1968 let c = ValidateArgs {
1969 globs: vec![pattern],
1970 exclude: vec![],
1971 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1972 force_schema_fetch: true,
1973 force_validation: true,
1974 no_catalog: false,
1975 config_dir: Some(tmp.path().to_path_buf()),
1976 schema_cache_ttl: None,
1977 };
1978 let result = run_with(&c, Some(client), |_| {}).await?;
1979 assert!(result.has_errors());
1980 assert_eq!(result.files_checked(), 1);
1981 Ok(())
1982 }
1983
1984 #[tokio::test]
1987 async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1988 let tmp = tempfile::tempdir()?;
1989 let schema_path = tmp.path().join("schema.json");
1990 fs::write(&schema_path, SCHEMA)?;
1991
1992 let f = tmp.path().join("valid.json");
1993 fs::write(
1994 &f,
1995 format!(
1996 r#"{{"$schema":"{}","name":"hello"}}"#,
1997 schema_path.to_string_lossy()
1998 ),
1999 )?;
2000
2001 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2002
2003 let c = ValidateArgs {
2005 globs: vec![pattern.clone()],
2006 exclude: vec![],
2007 cache_dir: None,
2008 force_schema_fetch: true,
2009 force_validation: false,
2010 no_catalog: true,
2011 config_dir: None,
2012 schema_cache_ttl: None,
2013 };
2014 let mut first_statuses = Vec::new();
2015 let result = run_with(&c, Some(mock(&[])), |cf| {
2016 first_statuses.push(cf.validation_cache_status);
2017 })
2018 .await?;
2019 assert!(!result.has_errors());
2020 assert!(result.files_checked() > 0);
2021
2022 assert!(
2024 first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2025 "expected at least one validation cache miss on first run"
2026 );
2027
2028 let mut second_statuses = Vec::new();
2030 let result = run_with(&c, Some(mock(&[])), |cf| {
2031 second_statuses.push(cf.validation_cache_status);
2032 })
2033 .await?;
2034 assert!(!result.has_errors());
2035
2036 assert!(
2038 second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2039 "expected at least one validation cache hit on second run"
2040 );
2041 Ok(())
2042 }
2043
2044 #[test]
2047 fn clean_strips_anyof_value() {
2048 let msg =
2049 r#"{"type":"bad"} is not valid under any of the schemas listed in the 'anyOf' keyword"#;
2050 assert_eq!(
2051 clean_error_message(msg.to_string()),
2052 "not valid under any of the schemas listed in the 'anyOf' keyword"
2053 );
2054 }
2055
2056 #[test]
2057 fn clean_strips_oneof_value() {
2058 let msg = r#"{"runs-on":"ubuntu-latest","steps":[]} is not valid under any of the schemas listed in the 'oneOf' keyword"#;
2059 assert_eq!(
2060 clean_error_message(msg.to_string()),
2061 "not valid under any of the schemas listed in the 'oneOf' keyword"
2062 );
2063 }
2064
2065 #[test]
2066 fn clean_strips_long_value() {
2067 let long_value = "x".repeat(5000);
2068 let suffix = " is not valid under any of the schemas listed in the 'anyOf' keyword";
2069 let msg = format!("{long_value}{suffix}");
2070 assert_eq!(
2071 clean_error_message(msg),
2072 "not valid under any of the schemas listed in the 'anyOf' keyword"
2073 );
2074 }
2075
2076 #[test]
2077 fn clean_preserves_type_error() {
2078 let msg = r#"12345 is not of types "null", "string""#;
2079 assert_eq!(clean_error_message(msg.to_string()), msg);
2080 }
2081
2082 #[test]
2083 fn clean_preserves_required_property() {
2084 let msg = "\"name\" is a required property";
2085 assert_eq!(clean_error_message(msg.to_string()), msg);
2086 }
2087
2088 #[tokio::test]
2089 async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2090 let tmp = tempfile::tempdir()?;
2091
2092 std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2094
2095 let schema_path = tmp.path().join("schema.json");
2097 std::fs::write(
2098 &schema_path,
2099 r#"{
2100 "type": "object",
2101 "properties": {
2102 "name": { "$ref": "./defs.json" }
2103 },
2104 "required": ["name"]
2105 }"#,
2106 )?;
2107
2108 let schema_uri = schema_path.to_string_lossy();
2110 std::fs::write(
2111 tmp.path().join("data.json"),
2112 format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2113 )?;
2114
2115 std::fs::write(
2117 tmp.path().join("bad.json"),
2118 format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2119 )?;
2120
2121 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2122 let args = ValidateArgs {
2123 globs: vec![pattern],
2124 exclude: vec![],
2125 cache_dir: None,
2126 force_schema_fetch: true,
2127 force_validation: true,
2128 no_catalog: true,
2129 config_dir: None,
2130 schema_cache_ttl: None,
2131 };
2132 let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2133
2134 assert!(result.has_errors());
2136 assert_eq!(result.errors.len(), 1);
2138 Ok(())
2139 }
2140}