1use alloc::collections::BTreeMap;
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use anyhow::Result;
7use serde_json::Value;
8
9use lintel_diagnostics::reporter::{CheckResult, CheckedFile};
10use lintel_diagnostics::{
11 DEFAULT_LABEL, LintelDiagnostic, ValidationDiagnostic, find_instance_path_span, format_label,
12};
13use lintel_schema_cache::{CacheStatus, SchemaCache};
14use lintel_validation_cache::{ValidationCacheStatus, ValidationError, ValidationErrorKind};
15use schema_catalog::{CompiledCatalog, FileFormat};
16
17use crate::catalog;
18use crate::parsers::{self, Parser};
19use crate::registry;
20use crate::suggest;
21
22const FD_CONCURRENCY_LIMIT: usize = 128;
26
27struct LocalRetriever {
30 http: SchemaCache,
31}
32
33#[async_trait::async_trait]
34impl jsonschema::AsyncRetrieve for LocalRetriever {
35 async fn retrieve(
36 &self,
37 uri: &jsonschema::Uri<String>,
38 ) -> Result<Value, Box<dyn core::error::Error + Send + Sync>> {
39 let s = uri.as_str();
40 if let Some(raw) = s.strip_prefix("file://") {
41 let path = percent_encoding::percent_decode_str(raw).decode_utf8()?;
42 let content = tokio::fs::read_to_string(path.as_ref()).await?;
43 Ok(serde_json::from_str(&content)?)
44 } else {
45 self.http.retrieve(uri).await
46 }
47 }
48}
49
50pub struct ValidateArgs {
51 pub globs: Vec<String>,
53
54 pub exclude: Vec<String>,
56
57 pub cache_dir: Option<String>,
59
60 pub force_schema_fetch: bool,
62
63 pub force_validation: bool,
65
66 pub no_catalog: bool,
68
69 pub config_dir: Option<PathBuf>,
71
72 pub schema_cache_ttl: Option<core::time::Duration>,
74}
75
76struct ParsedFile {
82 path: String,
83 content: String,
84 instance: Value,
85 original_schema_uri: String,
87}
88
89#[tracing::instrument(skip_all)]
97pub fn load_config(search_dir: Option<&Path>) -> (lintel_config::Config, PathBuf, Option<PathBuf>) {
98 let start_dir = match search_dir {
99 Some(d) => d.to_path_buf(),
100 None => match std::env::current_dir() {
101 Ok(d) => d,
102 Err(_) => return (lintel_config::Config::default(), PathBuf::from("."), None),
103 },
104 };
105
106 let Some(config_path) = lintel_config::find_config_path(&start_dir) else {
107 return (lintel_config::Config::default(), start_dir, None);
108 };
109
110 let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
111 let cfg = lintel_config::find_and_load(&start_dir)
112 .ok()
113 .flatten()
114 .unwrap_or_default();
115 (cfg, dir, Some(config_path))
116}
117
118#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
128pub fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
129 lintel_config::discover::collect_files(globs, exclude, |p| parsers::detect_format(p).is_some())
130}
131
132pub fn try_parse_all(content: &str, file_name: &str) -> Option<(FileFormat, Value)> {
141 use FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
142 const FORMATS: [FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
143
144 for fmt in FORMATS {
145 let parser = parsers::parser_for(fmt);
146 if let Ok(val) = parser.parse(content, file_name) {
147 return Some((fmt, val));
148 }
149 }
150 None
151}
152
153enum FileResult {
156 Parsed {
157 schema_uri: String,
158 parsed: ParsedFile,
159 },
160 Error(LintelDiagnostic),
161 Skip,
162}
163
164fn resolve_local_schema_path(schema_uri: &str, base_dir: Option<&Path>) -> String {
170 if schema_uri.starts_with("http://") || schema_uri.starts_with("https://") {
171 return schema_uri.to_string();
172 }
173 if let Some(dir) = base_dir {
174 normalize_path(&dir.join(schema_uri))
175 .to_string_lossy()
176 .to_string()
177 } else {
178 schema_uri.to_string()
179 }
180}
181
182fn normalize_path(path: &Path) -> PathBuf {
185 let mut out = PathBuf::new();
186 for component in path.components() {
187 match component {
188 std::path::Component::CurDir => {}
189 std::path::Component::ParentDir => {
190 out.pop();
191 }
192 c => out.push(c),
193 }
194 }
195 out
196}
197
198#[allow(clippy::too_many_arguments)]
202fn process_one_file(
203 path: &Path,
204 content: String,
205 config: &lintel_config::Config,
206 config_dir: &Path,
207 compiled_catalogs: &[CompiledCatalog],
208) -> Vec<FileResult> {
209 let path_str = path.display().to_string();
210 let file_name = path
211 .file_name()
212 .and_then(|n| n.to_str())
213 .unwrap_or(&path_str);
214
215 let detected_format = parsers::detect_format(path);
216
217 if detected_format == Some(FileFormat::Jsonl) {
219 return process_jsonl_file(
220 path,
221 &path_str,
222 file_name,
223 &content,
224 config,
225 config_dir,
226 compiled_catalogs,
227 );
228 }
229
230 if detected_format.is_none() {
232 let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
233 || compiled_catalogs
234 .iter()
235 .any(|cat| cat.find_schema(&path_str, file_name).is_some());
236 if !has_match {
237 return vec![FileResult::Skip];
238 }
239 }
240
241 let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
243 let parser = parsers::parser_for(fmt);
244 match parser.parse(&content, &path_str) {
245 Ok(val) => (parser, val),
246 Err(parse_err) => return vec![FileResult::Error(parse_err)],
247 }
248 } else {
249 match try_parse_all(&content, &path_str) {
250 Some((fmt, val)) => (parsers::parser_for(fmt), val),
251 None => return vec![FileResult::Skip],
252 }
253 };
254
255 if instance.is_null() {
257 return vec![FileResult::Skip];
258 }
259
260 let inline_uri = parser.extract_schema_uri(&content, &instance);
268 let from_inline = inline_uri.is_some();
269 let schema_uri = inline_uri
270 .or_else(|| {
271 config
272 .find_schema_mapping(&path_str, file_name)
273 .map(str::to_string)
274 })
275 .or_else(|| {
276 compiled_catalogs
277 .iter()
278 .find_map(|cat| cat.find_schema(&path_str, file_name))
279 .map(str::to_string)
280 });
281
282 let Some(schema_uri) = schema_uri else {
283 return vec![FileResult::Skip];
284 };
285
286 let original_schema_uri = schema_uri.clone();
288
289 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
291 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
292
293 let schema_uri = resolve_local_schema_path(
297 &schema_uri,
298 if from_inline {
299 path.parent()
300 } else {
301 Some(config_dir)
302 },
303 );
304
305 vec![FileResult::Parsed {
306 schema_uri,
307 parsed: ParsedFile {
308 path: path_str,
309 content,
310 instance,
311 original_schema_uri,
312 },
313 }]
314}
315
316#[allow(clippy::too_many_arguments)]
324fn process_jsonl_file(
325 path: &Path,
326 path_str: &str,
327 file_name: &str,
328 content: &str,
329 config: &lintel_config::Config,
330 config_dir: &Path,
331 compiled_catalogs: &[CompiledCatalog],
332) -> Vec<FileResult> {
333 let lines = match parsers::jsonl::parse_jsonl(content, path_str) {
334 Ok(lines) => lines,
335 Err(parse_err) => return vec![FileResult::Error(parse_err)],
336 };
337
338 if lines.is_empty() {
339 return vec![FileResult::Skip];
340 }
341
342 let mut results = Vec::with_capacity(lines.len());
343
344 if let Some(mismatches) = parsers::jsonl::check_schema_consistency(&lines) {
346 for m in mismatches {
347 results.push(FileResult::Error(LintelDiagnostic::SchemaMismatch {
348 path: path_str.to_string(),
349 line_number: m.line_number,
350 message: format!("expected consistent $schema but found {}", m.schema_uri),
351 }));
352 }
353 }
354
355 for line in lines {
356 let inline_uri = parsers::jsonl::extract_schema_uri(&line.value);
359 let from_inline = inline_uri.is_some();
360 let schema_uri = inline_uri
361 .or_else(|| {
362 config
363 .find_schema_mapping(path_str, file_name)
364 .map(str::to_string)
365 })
366 .or_else(|| {
367 compiled_catalogs
368 .iter()
369 .find_map(|cat| cat.find_schema(path_str, file_name))
370 .map(str::to_string)
371 });
372
373 let Some(schema_uri) = schema_uri else {
374 continue;
375 };
376
377 let original_schema_uri = schema_uri.clone();
378
379 let schema_uri = lintel_config::apply_rewrites(&schema_uri, &config.rewrite);
380 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
381
382 let schema_uri = resolve_local_schema_path(
384 &schema_uri,
385 if from_inline {
386 path.parent()
387 } else {
388 Some(config_dir)
389 },
390 );
391
392 let line_path = format!("{path_str}:{}", line.line_number);
393
394 results.push(FileResult::Parsed {
395 schema_uri,
396 parsed: ParsedFile {
397 path: line_path,
398 content: line.raw,
399 instance: line.value,
400 original_schema_uri,
401 },
402 });
403 }
404
405 if results.is_empty() {
406 vec![FileResult::Skip]
407 } else {
408 results
409 }
410}
411
412#[tracing::instrument(skip_all, fields(file_count = files.len()))]
419pub async fn read_files(
420 files: &[PathBuf],
421 errors: &mut Vec<LintelDiagnostic>,
422) -> Vec<(PathBuf, String)> {
423 let semaphore = alloc::sync::Arc::new(tokio::sync::Semaphore::new(FD_CONCURRENCY_LIMIT));
424 let mut read_set = tokio::task::JoinSet::new();
425 for path in files {
426 let path = path.clone();
427 let sem = semaphore.clone();
428 read_set.spawn(async move {
429 let _permit = sem.acquire().await.expect("semaphore closed");
430 let result = tokio::fs::read_to_string(&path).await;
431 (path, result)
432 });
433 }
434
435 let mut file_contents = Vec::with_capacity(files.len());
436 while let Some(result) = read_set.join_next().await {
437 match result {
438 Ok((path, Ok(content))) => file_contents.push((path, content)),
439 Ok((path, Err(e))) => {
440 errors.push(LintelDiagnostic::Io {
441 path: path.display().to_string(),
442 message: format!("failed to read: {e}"),
443 });
444 }
445 Err(e) => tracing::warn!("file read task panicked: {e}"),
446 }
447 }
448
449 file_contents
450}
451
452#[tracing::instrument(skip_all, fields(file_count = file_contents.len()))]
455#[allow(clippy::too_many_arguments)]
456fn parse_and_group_contents(
457 file_contents: Vec<(PathBuf, String)>,
458 config: &lintel_config::Config,
459 config_dir: &Path,
460 compiled_catalogs: &[CompiledCatalog],
461 errors: &mut Vec<LintelDiagnostic>,
462) -> BTreeMap<String, Vec<ParsedFile>> {
463 let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
464 for (path, content) in file_contents {
465 let results = process_one_file(&path, content, config, config_dir, compiled_catalogs);
466 for result in results {
467 match result {
468 FileResult::Parsed { schema_uri, parsed } => {
469 schema_groups.entry(schema_uri).or_default().push(parsed);
470 }
471 FileResult::Error(e) => errors.push(e),
472 FileResult::Skip => {}
473 }
474 }
475 }
476
477 schema_groups
478}
479
480#[allow(clippy::too_many_arguments)]
489async fn fetch_schema_from_prefetched(
490 schema_uri: &str,
491 prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
492 local_cache: &mut HashMap<String, Value>,
493 group: &[ParsedFile],
494 errors: &mut Vec<LintelDiagnostic>,
495 checked: &mut Vec<CheckedFile>,
496 on_check: &mut impl FnMut(&CheckedFile),
497) -> Option<(Value, Option<CacheStatus>)> {
498 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
499
500 let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
501 match prefetched.get(schema_uri) {
502 Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
503 Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
504 None => Err(format!("schema not prefetched: {schema_uri}")),
505 }
506 } else if let Some(cached) = local_cache.get(schema_uri) {
507 Ok((cached.clone(), None))
508 } else {
509 tokio::fs::read_to_string(schema_uri)
510 .await
511 .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
512 .and_then(|content| {
513 serde_json::from_str::<Value>(&content)
514 .map(|v| {
515 local_cache.insert(schema_uri.to_string(), v.clone());
516 (v, None)
517 })
518 .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
519 })
520 };
521
522 match result {
523 Ok(value) => Some(value),
524 Err(message) => {
525 report_group_error(
526 |path| LintelDiagnostic::SchemaFetch {
527 path: path.to_string(),
528 message: message.clone(),
529 },
530 schema_uri,
531 None,
532 group,
533 errors,
534 checked,
535 on_check,
536 );
537 None
538 }
539 }
540}
541
542#[allow(clippy::too_many_arguments)]
544fn report_group_error<P: alloc::borrow::Borrow<ParsedFile>>(
545 make_error: impl Fn(&str) -> LintelDiagnostic,
546 schema_uri: &str,
547 cache_status: Option<CacheStatus>,
548 group: &[P],
549 errors: &mut Vec<LintelDiagnostic>,
550 checked: &mut Vec<CheckedFile>,
551 on_check: &mut impl FnMut(&CheckedFile),
552) {
553 for item in group {
554 let pf = item.borrow();
555 let cf = CheckedFile {
556 path: pf.path.clone(),
557 schema: schema_uri.to_string(),
558 cache_status,
559 validation_cache_status: None,
560 };
561 on_check(&cf);
562 checked.push(cf);
563 errors.push(make_error(&pf.path));
564 }
565}
566
567#[allow(clippy::too_many_arguments)]
569fn mark_group_checked<P: alloc::borrow::Borrow<ParsedFile>>(
570 schema_uri: &str,
571 cache_status: Option<CacheStatus>,
572 validation_cache_status: Option<ValidationCacheStatus>,
573 group: &[P],
574 checked: &mut Vec<CheckedFile>,
575 on_check: &mut impl FnMut(&CheckedFile),
576) {
577 for item in group {
578 let pf = item.borrow();
579 let cf = CheckedFile {
580 path: pf.path.clone(),
581 schema: schema_uri.to_string(),
582 cache_status,
583 validation_cache_status,
584 };
585 on_check(&cf);
586 checked.push(cf);
587 }
588}
589
590#[allow(clippy::too_many_arguments)]
592fn push_validation_errors(
593 pf: &ParsedFile,
594 schema_url: &str,
595 validation_errors: &[ValidationError],
596 errors: &mut Vec<LintelDiagnostic>,
597 schema: Option<&Value>,
598) {
599 for ve in validation_errors {
600 let instance_path = if ve.instance_path.is_empty() {
601 DEFAULT_LABEL.to_string()
602 } else {
603 ve.instance_path.clone()
604 };
605 let label = format_label(&instance_path, &ve.schema_path);
606 let source_span: miette::SourceSpan = ve.span.into();
607 let mut message = ve.kind.message();
608 if let ValidationErrorKind::AdditionalProperty { ref property } = ve.kind
609 && let Some(s) = schema
610 && let Some(suggestion) = suggest::suggest_property(property, &ve.schema_path, s)
611 {
612 message = format!("{message}; did you mean '{suggestion}'?");
613 }
614 errors.push(LintelDiagnostic::Validation(ValidationDiagnostic {
615 src: miette::NamedSource::new(&pf.path, pf.content.clone()),
616 span: source_span,
617 schema_span: source_span,
618 path: pf.path.clone(),
619 instance_path,
620 label,
621 message,
622 schema_url: schema_url.to_string(),
623 schema_path: ve.schema_path.clone(),
624 validation_code: format!("validation({})", ve.kind.as_ref()),
625 }));
626 }
627}
628
629fn convert_kind(kind: &jsonschema::error::ValidationErrorKind) -> ValidationErrorKind {
633 use jsonschema::error::{TypeKind, ValidationErrorKind as JK};
634
635 match kind {
636 JK::AdditionalItems { limit } => ValidationErrorKind::AdditionalItems { limit: *limit },
637 JK::AdditionalProperties { .. } => unreachable!("handled in convert_error"),
638 JK::AnyOf { .. } => ValidationErrorKind::AnyOf,
639 JK::BacktrackLimitExceeded { error } => ValidationErrorKind::BacktrackLimitExceeded {
640 message: error.to_string(),
641 },
642 JK::Constant { expected_value } => ValidationErrorKind::Constant {
643 expected_value: expected_value.clone(),
644 },
645 JK::Contains => ValidationErrorKind::Contains,
646 JK::ContentEncoding { content_encoding } => ValidationErrorKind::ContentEncoding {
647 content_encoding: content_encoding.clone(),
648 },
649 JK::ContentMediaType { content_media_type } => ValidationErrorKind::ContentMediaType {
650 content_media_type: content_media_type.clone(),
651 },
652 JK::Custom { keyword, message } => ValidationErrorKind::Custom {
653 keyword: keyword.clone(),
654 message: message.clone(),
655 },
656 JK::Enum { options } => ValidationErrorKind::Enum {
657 options: options.clone(),
658 },
659 JK::ExclusiveMaximum { limit } => ValidationErrorKind::ExclusiveMaximum {
660 limit: limit.clone(),
661 },
662 JK::ExclusiveMinimum { limit } => ValidationErrorKind::ExclusiveMinimum {
663 limit: limit.clone(),
664 },
665 JK::FalseSchema => ValidationErrorKind::FalseSchema,
666 JK::Format { format } => ValidationErrorKind::Format {
667 format: format.clone(),
668 },
669 JK::FromUtf8 { error } => ValidationErrorKind::FromUtf8 {
670 message: error.to_string(),
671 },
672 JK::MaxItems { limit } => ValidationErrorKind::MaxItems { limit: *limit },
673 JK::Maximum { limit } => ValidationErrorKind::Maximum {
674 limit: limit.clone(),
675 },
676 JK::MaxLength { limit } => ValidationErrorKind::MaxLength { limit: *limit },
677 JK::MaxProperties { limit } => ValidationErrorKind::MaxProperties { limit: *limit },
678 JK::MinItems { limit } => ValidationErrorKind::MinItems { limit: *limit },
679 JK::Minimum { limit } => ValidationErrorKind::Minimum {
680 limit: limit.clone(),
681 },
682 JK::MinLength { limit } => ValidationErrorKind::MinLength { limit: *limit },
683 JK::MinProperties { limit } => ValidationErrorKind::MinProperties { limit: *limit },
684 JK::MultipleOf { multiple_of } => ValidationErrorKind::MultipleOf {
685 multiple_of: *multiple_of,
686 },
687 JK::Not { .. } => ValidationErrorKind::Not,
688 JK::OneOfMultipleValid { .. } => ValidationErrorKind::OneOfMultipleValid,
689 JK::OneOfNotValid { .. } => ValidationErrorKind::OneOfNotValid,
690 JK::Pattern { pattern } => ValidationErrorKind::Pattern {
691 pattern: pattern.clone(),
692 },
693 JK::PropertyNames { error } => ValidationErrorKind::PropertyNames {
694 message: error.to_string(),
695 },
696 JK::Required { property } => ValidationErrorKind::Required {
697 property: match property {
698 Value::String(s) => format!("\"{s}\""),
699 other => other.to_string(),
700 },
701 },
702 JK::Type { kind } => {
703 let expected = match kind {
704 TypeKind::Single(t) => t.to_string(),
705 TypeKind::Multiple(ts) => {
706 let parts: Vec<String> = ts.iter().map(|t| t.to_string()).collect();
707 parts.join(", ")
708 }
709 };
710 ValidationErrorKind::Type { expected }
711 }
712 JK::UnevaluatedItems { unexpected } => ValidationErrorKind::UnevaluatedItems {
713 unexpected: unexpected.clone(),
714 },
715 JK::UnevaluatedProperties { unexpected } => ValidationErrorKind::UnevaluatedProperties {
716 unexpected: unexpected.clone(),
717 },
718 JK::UniqueItems => ValidationErrorKind::UniqueItems,
719 JK::Referencing(err) => ValidationErrorKind::Referencing {
720 message: err.to_string(),
721 },
722 }
723}
724
725fn convert_error(error: &jsonschema::ValidationError<'_>, content: &str) -> Vec<ValidationError> {
730 use jsonschema::error::ValidationErrorKind as JK;
731
732 let schema_path = error.schema_path().to_string();
733 let base_instance_path = error.instance_path().to_string();
734
735 if let JK::AdditionalProperties { unexpected } = error.kind() {
736 return unexpected
737 .iter()
738 .map(|prop| {
739 let instance_path = format!("{base_instance_path}/{prop}");
740 let span = find_instance_path_span(content, &instance_path);
741 ValidationError {
742 instance_path,
743 schema_path: schema_path.clone(),
744 kind: ValidationErrorKind::AdditionalProperty {
745 property: prop.clone(),
746 },
747 span,
748 }
749 })
750 .collect();
751 }
752
753 let span = find_instance_path_span(content, &base_instance_path);
754 vec![ValidationError {
755 instance_path: base_instance_path,
756 schema_path,
757 kind: convert_kind(error.kind()),
758 span,
759 }]
760}
761
762#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
765#[allow(clippy::too_many_arguments)]
766async fn validate_group<P: alloc::borrow::Borrow<ParsedFile>>(
767 validator: &jsonschema::Validator,
768 schema_uri: &str,
769 schema_hash: &str,
770 validate_formats: bool,
771 cache_status: Option<CacheStatus>,
772 group: &[P],
773 schema_value: &Value,
774 vcache: &lintel_validation_cache::ValidationCache,
775 errors: &mut Vec<LintelDiagnostic>,
776 checked: &mut Vec<CheckedFile>,
777 on_check: &mut impl FnMut(&CheckedFile),
778) {
779 for item in group {
780 let pf = item.borrow();
781 let file_errors: Vec<ValidationError> = validator
782 .iter_errors(&pf.instance)
783 .flat_map(|error| convert_error(&error, &pf.content))
784 .collect();
785
786 vcache
787 .store(
788 &lintel_validation_cache::CacheKey {
789 file_content: &pf.content,
790 schema_hash,
791 validate_formats,
792 },
793 &file_errors,
794 )
795 .await;
796 push_validation_errors(pf, schema_uri, &file_errors, errors, Some(schema_value));
797
798 let cf = CheckedFile {
799 path: pf.path.clone(),
800 schema: schema_uri.to_string(),
801 cache_status,
802 validation_cache_status: Some(ValidationCacheStatus::Miss),
803 };
804 on_check(&cf);
805 checked.push(cf);
806 }
807}
808
809pub async fn fetch_compiled_catalogs(
817 retriever: &SchemaCache,
818 config: &lintel_config::Config,
819 no_catalog: bool,
820) -> Vec<CompiledCatalog> {
821 let mut compiled_catalogs = Vec::new();
822
823 if !no_catalog {
824 let catalog_span = tracing::info_span!("fetch_catalogs").entered();
825
826 #[allow(clippy::items_after_statements)]
830 type CatalogResult = (
831 usize, String,
833 Result<CompiledCatalog, Box<dyn core::error::Error + Send + Sync>>,
834 );
835 let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
836
837 for (i, registry_url) in config.registries.iter().enumerate() {
839 let r = retriever.clone();
840 let url = registry_url.clone();
841 let label = format!("registry {url}");
842 catalog_tasks.spawn(async move {
843 let result = registry::fetch(&r, &url)
844 .await
845 .map(|cat| CompiledCatalog::compile(&cat));
846 (i, label, result)
847 });
848 }
849
850 let lintel_order = config.registries.len();
852 if !config.no_default_catalog {
853 let r = retriever.clone();
854 let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
855 catalog_tasks.spawn(async move {
856 let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
857 .await
858 .map(|cat| CompiledCatalog::compile(&cat));
859 (lintel_order, label, result)
860 });
861 }
862
863 let schemastore_order = config.registries.len() + 1;
865 let r = retriever.clone();
866 catalog_tasks.spawn(async move {
867 let result = catalog::fetch_catalog(&r)
868 .await
869 .map(|cat| CompiledCatalog::compile(&cat));
870 (schemastore_order, "SchemaStore catalog".to_string(), result)
871 });
872
873 let mut results: Vec<(usize, CompiledCatalog)> = Vec::new();
874 while let Some(result) = catalog_tasks.join_next().await {
875 match result {
876 Ok((order, _, Ok(compiled))) => results.push((order, compiled)),
877 Ok((_, label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
878 Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
879 }
880 }
881 results.sort_by_key(|(order, _)| *order);
882 compiled_catalogs.extend(results.into_iter().map(|(_, cat)| cat));
883
884 drop(catalog_span);
885 }
886
887 compiled_catalogs
888}
889
890pub async fn run(args: &ValidateArgs) -> Result<CheckResult> {
894 run_with(args, None, |_| {}).await
895}
896
897pub async fn run_with(
904 args: &ValidateArgs,
905 cache: Option<SchemaCache>,
906 on_check: impl FnMut(&CheckedFile),
907) -> Result<CheckResult> {
908 let files = collect_files(&args.globs, &args.exclude)?;
909 run_with_files(args, cache, files, on_check).await
910}
911
912#[tracing::instrument(skip_all, name = "validate")]
921#[allow(clippy::too_many_lines)]
922pub async fn run_with_files(
923 args: &ValidateArgs,
924 cache: Option<SchemaCache>,
925 files: Vec<PathBuf>,
926 mut on_check: impl FnMut(&CheckedFile),
927) -> Result<CheckResult> {
928 let retriever = build_retriever(args, cache);
929 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
930 tracing::info!(file_count = files.len(), "collected files");
931
932 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
933
934 let mut errors: Vec<LintelDiagnostic> = Vec::new();
935 let file_contents = read_files(&files, &mut errors).await;
936
937 run_with_contents_inner(
938 file_contents,
939 args,
940 retriever,
941 config,
942 &config_dir,
943 compiled_catalogs,
944 errors,
945 &mut on_check,
946 )
947 .await
948}
949
950pub async fn run_with_contents(
958 args: &ValidateArgs,
959 file_contents: Vec<(PathBuf, String)>,
960 cache: Option<SchemaCache>,
961 mut on_check: impl FnMut(&CheckedFile),
962) -> Result<CheckResult> {
963 let retriever = build_retriever(args, cache);
964 let (config, config_dir, _config_path) = load_config(args.config_dir.as_deref());
965 let compiled_catalogs = fetch_compiled_catalogs(&retriever, &config, args.no_catalog).await;
966 let errors: Vec<LintelDiagnostic> = Vec::new();
967
968 run_with_contents_inner(
969 file_contents,
970 args,
971 retriever,
972 config,
973 &config_dir,
974 compiled_catalogs,
975 errors,
976 &mut on_check,
977 )
978 .await
979}
980
981fn build_retriever(args: &ValidateArgs, cache: Option<SchemaCache>) -> SchemaCache {
982 if let Some(c) = cache {
983 return c;
984 }
985 let mut builder = SchemaCache::builder().force_fetch(args.force_schema_fetch);
986 if let Some(dir) = &args.cache_dir {
987 let path = PathBuf::from(dir);
988 let _ = fs::create_dir_all(&path);
989 builder = builder.cache_dir(path);
990 }
991 if let Some(ttl) = args.schema_cache_ttl {
992 builder = builder.ttl(ttl);
993 }
994 builder.build()
995}
996
997#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
998async fn run_with_contents_inner(
999 file_contents: Vec<(PathBuf, String)>,
1000 args: &ValidateArgs,
1001 retriever: SchemaCache,
1002 config: lintel_config::Config,
1003 config_dir: &Path,
1004 compiled_catalogs: Vec<CompiledCatalog>,
1005 mut errors: Vec<LintelDiagnostic>,
1006 on_check: &mut impl FnMut(&CheckedFile),
1007) -> Result<CheckResult> {
1008 let mut checked: Vec<CheckedFile> = Vec::new();
1009
1010 let schema_groups = parse_and_group_contents(
1012 file_contents,
1013 &config,
1014 config_dir,
1015 &compiled_catalogs,
1016 &mut errors,
1017 );
1018 tracing::info!(
1019 schema_count = schema_groups.len(),
1020 total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
1021 "grouped files by schema"
1022 );
1023
1024 let vcache = lintel_validation_cache::ValidationCache::new(
1026 lintel_validation_cache::ensure_cache_dir(),
1027 args.force_validation,
1028 );
1029
1030 let remote_uris: Vec<&String> = schema_groups
1032 .keys()
1033 .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
1034 .collect();
1035
1036 let prefetched = {
1037 let _prefetch_span =
1038 tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
1039
1040 let mut schema_tasks = tokio::task::JoinSet::new();
1041 for uri in remote_uris {
1042 let r = retriever.clone();
1043 let u = uri.clone();
1044 schema_tasks.spawn(async move {
1045 let result = r.fetch(&u).await;
1046 (u, result)
1047 });
1048 }
1049
1050 let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
1051 while let Some(result) = schema_tasks.join_next().await {
1052 match result {
1053 Ok((uri, fetch_result)) => {
1054 prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
1055 }
1056 Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
1057 }
1058 }
1059
1060 prefetched
1061 };
1062
1063 let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
1065 let mut fetch_time = core::time::Duration::ZERO;
1066 let mut hash_time = core::time::Duration::ZERO;
1067 let mut vcache_time = core::time::Duration::ZERO;
1068 let mut compile_time = core::time::Duration::ZERO;
1069 let mut validate_time = core::time::Duration::ZERO;
1070
1071 for (schema_uri, group) in &schema_groups {
1072 let _group_span = tracing::debug_span!(
1073 "schema_group",
1074 schema = schema_uri.as_str(),
1075 files = group.len(),
1076 )
1077 .entered();
1078
1079 let validate_formats = group.iter().all(|pf| {
1082 config
1083 .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
1084 });
1085
1086 let t = std::time::Instant::now();
1089 let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
1090 schema_uri,
1091 &prefetched,
1092 &mut local_schema_cache,
1093 group,
1094 &mut errors,
1095 &mut checked,
1096 on_check,
1097 )
1098 .await
1099 else {
1100 fetch_time += t.elapsed();
1101 continue;
1102 };
1103 fetch_time += t.elapsed();
1104
1105 let t = std::time::Instant::now();
1107 let schema_hash = lintel_validation_cache::schema_hash(&schema_value);
1108 hash_time += t.elapsed();
1109
1110 let mut cache_misses: Vec<&ParsedFile> = Vec::new();
1112
1113 let t = std::time::Instant::now();
1114 for pf in group {
1115 let (cached, vcache_status) = vcache
1116 .lookup(&lintel_validation_cache::CacheKey {
1117 file_content: &pf.content,
1118 schema_hash: &schema_hash,
1119 validate_formats,
1120 })
1121 .await;
1122
1123 if let Some(cached_errors) = cached {
1124 push_validation_errors(
1125 pf,
1126 schema_uri,
1127 &cached_errors,
1128 &mut errors,
1129 Some(&schema_value),
1130 );
1131 let cf = CheckedFile {
1132 path: pf.path.clone(),
1133 schema: schema_uri.clone(),
1134 cache_status,
1135 validation_cache_status: Some(vcache_status),
1136 };
1137 on_check(&cf);
1138 checked.push(cf);
1139 } else {
1140 cache_misses.push(pf);
1141 }
1142 }
1143 vcache_time += t.elapsed();
1144
1145 tracing::debug!(
1146 cache_hits = group.len() - cache_misses.len(),
1147 cache_misses = cache_misses.len(),
1148 "validation cache"
1149 );
1150
1151 if cache_misses.is_empty() {
1153 continue;
1154 }
1155
1156 let t = std::time::Instant::now();
1158 let validator = {
1159 let is_remote_schema =
1163 schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
1164 let local_retriever = LocalRetriever {
1165 http: retriever.clone(),
1166 };
1167 let opts = jsonschema::async_options()
1168 .with_retriever(local_retriever)
1169 .should_validate_formats(validate_formats);
1170 let base_uri = if is_remote_schema {
1171 let uri = match schema_uri.find('#') {
1173 Some(pos) => schema_uri[..pos].to_string(),
1174 None => schema_uri.clone(),
1175 };
1176 Some(uri)
1177 } else {
1178 std::fs::canonicalize(schema_uri)
1179 .ok()
1180 .map(|p| format!("file://{}", p.display()))
1181 };
1182 let opts = if let Some(uri) = base_uri {
1183 opts.with_base_uri(uri)
1184 } else {
1185 opts
1186 };
1187 match opts.build(&schema_value).await {
1188 Ok(v) => v,
1189 Err(e) => {
1190 compile_time += t.elapsed();
1191 if !validate_formats && e.to_string().contains("uri-reference") {
1195 mark_group_checked(
1196 schema_uri,
1197 cache_status,
1198 Some(ValidationCacheStatus::Miss),
1199 &cache_misses,
1200 &mut checked,
1201 on_check,
1202 );
1203 continue;
1204 }
1205 let msg = format!("failed to compile schema: {e}");
1206 report_group_error(
1207 |path| LintelDiagnostic::SchemaCompile {
1208 path: path.to_string(),
1209 message: msg.clone(),
1210 },
1211 schema_uri,
1212 cache_status,
1213 &cache_misses,
1214 &mut errors,
1215 &mut checked,
1216 on_check,
1217 );
1218 continue;
1219 }
1220 }
1221 };
1222 compile_time += t.elapsed();
1223
1224 let t = std::time::Instant::now();
1225 validate_group(
1226 &validator,
1227 schema_uri,
1228 &schema_hash,
1229 validate_formats,
1230 cache_status,
1231 &cache_misses,
1232 &schema_value,
1233 &vcache,
1234 &mut errors,
1235 &mut checked,
1236 on_check,
1237 )
1238 .await;
1239 validate_time += t.elapsed();
1240 }
1241
1242 #[allow(clippy::cast_possible_truncation)]
1243 {
1244 tracing::info!(
1245 fetch_ms = fetch_time.as_millis() as u64,
1246 hash_ms = hash_time.as_millis() as u64,
1247 vcache_ms = vcache_time.as_millis() as u64,
1248 compile_ms = compile_time.as_millis() as u64,
1249 validate_ms = validate_time.as_millis() as u64,
1250 "phase2 breakdown"
1251 );
1252 }
1253
1254 errors.sort_by(|a, b| {
1256 a.path()
1257 .cmp(b.path())
1258 .then_with(|| a.offset().cmp(&b.offset()))
1259 });
1260
1261 Ok(CheckResult { errors, checked })
1262}
1263
1264#[cfg(test)]
1265mod tests {
1266 use super::*;
1267 use lintel_schema_cache::SchemaCache;
1268 use std::path::Path;
1269
1270 fn mock(entries: &[(&str, &str)]) -> SchemaCache {
1271 let cache = SchemaCache::memory();
1272 for (uri, body) in entries {
1273 cache.insert(
1274 uri,
1275 serde_json::from_str(body).expect("test mock: invalid JSON"),
1276 );
1277 }
1278 cache
1279 }
1280
1281 fn testdata() -> PathBuf {
1282 Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
1283 }
1284
1285 fn scenario_globs(dirs: &[&str]) -> Vec<String> {
1287 dirs.iter()
1288 .flat_map(|dir| {
1289 let base = testdata().join(dir);
1290 vec![
1291 base.join("*.json").to_string_lossy().to_string(),
1292 base.join("*.yaml").to_string_lossy().to_string(),
1293 base.join("*.yml").to_string_lossy().to_string(),
1294 base.join("*.json5").to_string_lossy().to_string(),
1295 base.join("*.jsonc").to_string_lossy().to_string(),
1296 base.join("*.toml").to_string_lossy().to_string(),
1297 ]
1298 })
1299 .collect()
1300 }
1301
1302 fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
1303 ValidateArgs {
1304 globs: scenario_globs(dirs),
1305 exclude: vec![],
1306 cache_dir: None,
1307 force_schema_fetch: true,
1308 force_validation: true,
1309 no_catalog: true,
1310 config_dir: None,
1311 schema_cache_ttl: None,
1312 }
1313 }
1314
1315 const SCHEMA: &str =
1316 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1317
1318 fn schema_mock() -> SchemaCache {
1319 mock(&[("https://example.com/schema.json", SCHEMA)])
1320 }
1321
1322 #[tokio::test]
1325 async fn no_matching_files() -> anyhow::Result<()> {
1326 let tmp = tempfile::tempdir()?;
1327 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1328 let c = ValidateArgs {
1329 globs: vec![pattern],
1330 exclude: vec![],
1331 cache_dir: None,
1332 force_schema_fetch: true,
1333 force_validation: true,
1334 no_catalog: true,
1335 config_dir: None,
1336 schema_cache_ttl: None,
1337 };
1338 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1339 assert!(!result.has_errors());
1340 Ok(())
1341 }
1342
1343 #[tokio::test]
1344 async fn dir_all_valid() -> anyhow::Result<()> {
1345 let c = args_for_dirs(&["positive_tests"]);
1346 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1347 assert!(!result.has_errors());
1348 Ok(())
1349 }
1350
1351 #[tokio::test]
1352 async fn dir_all_invalid() -> anyhow::Result<()> {
1353 let c = args_for_dirs(&["negative_tests"]);
1354 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1355 assert!(result.has_errors());
1356 Ok(())
1357 }
1358
1359 #[tokio::test]
1360 async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1361 let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1362 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1363 assert!(result.has_errors());
1364 Ok(())
1365 }
1366
1367 #[tokio::test]
1368 async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1369 let c = args_for_dirs(&["no_schema"]);
1370 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1371 assert!(!result.has_errors());
1372 Ok(())
1373 }
1374
1375 #[tokio::test]
1376 async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1377 let c = args_for_dirs(&["positive_tests", "no_schema"]);
1378 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1379 assert!(!result.has_errors());
1380 Ok(())
1381 }
1382
1383 #[tokio::test]
1386 async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1387 let dir = testdata().join("positive_tests");
1388 let c = ValidateArgs {
1389 globs: vec![dir.to_string_lossy().to_string()],
1390 exclude: vec![],
1391 cache_dir: None,
1392 force_schema_fetch: true,
1393 force_validation: true,
1394 no_catalog: true,
1395 config_dir: None,
1396 schema_cache_ttl: None,
1397 };
1398 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1399 assert!(!result.has_errors());
1400 assert!(result.files_checked() > 0);
1401 Ok(())
1402 }
1403
1404 #[tokio::test]
1405 async fn multiple_directory_args() -> anyhow::Result<()> {
1406 let pos_dir = testdata().join("positive_tests");
1407 let no_schema_dir = testdata().join("no_schema");
1408 let c = ValidateArgs {
1409 globs: vec![
1410 pos_dir.to_string_lossy().to_string(),
1411 no_schema_dir.to_string_lossy().to_string(),
1412 ],
1413 exclude: vec![],
1414 cache_dir: None,
1415 force_schema_fetch: true,
1416 force_validation: true,
1417 no_catalog: true,
1418 config_dir: None,
1419 schema_cache_ttl: None,
1420 };
1421 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1422 assert!(!result.has_errors());
1423 Ok(())
1424 }
1425
1426 #[tokio::test]
1427 async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1428 let dir = testdata().join("positive_tests");
1429 let glob_pattern = testdata()
1430 .join("no_schema")
1431 .join("*.json")
1432 .to_string_lossy()
1433 .to_string();
1434 let c = ValidateArgs {
1435 globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1436 exclude: vec![],
1437 cache_dir: None,
1438 force_schema_fetch: true,
1439 force_validation: true,
1440 no_catalog: true,
1441 config_dir: None,
1442 schema_cache_ttl: None,
1443 };
1444 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1445 assert!(!result.has_errors());
1446 Ok(())
1447 }
1448
1449 #[tokio::test]
1450 async fn malformed_json_parse_error() -> anyhow::Result<()> {
1451 let base = testdata().join("malformed");
1452 let c = ValidateArgs {
1453 globs: vec![base.join("*.json").to_string_lossy().to_string()],
1454 exclude: vec![],
1455 cache_dir: None,
1456 force_schema_fetch: true,
1457 force_validation: true,
1458 no_catalog: true,
1459 config_dir: None,
1460 schema_cache_ttl: None,
1461 };
1462 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1463 assert!(result.has_errors());
1464 Ok(())
1465 }
1466
1467 #[tokio::test]
1468 async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1469 let base = testdata().join("malformed");
1470 let c = ValidateArgs {
1471 globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1472 exclude: vec![],
1473 cache_dir: None,
1474 force_schema_fetch: true,
1475 force_validation: true,
1476 no_catalog: true,
1477 config_dir: None,
1478 schema_cache_ttl: None,
1479 };
1480 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1481 assert!(result.has_errors());
1482 Ok(())
1483 }
1484
1485 #[tokio::test]
1488 async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1489 let base = testdata().join("negative_tests");
1490 let c = ValidateArgs {
1491 globs: scenario_globs(&["positive_tests", "negative_tests"]),
1492 exclude: vec![
1493 base.join("missing_name.json").to_string_lossy().to_string(),
1494 base.join("missing_name.toml").to_string_lossy().to_string(),
1495 base.join("missing_name.yaml").to_string_lossy().to_string(),
1496 ],
1497 cache_dir: None,
1498 force_schema_fetch: true,
1499 force_validation: true,
1500 no_catalog: true,
1501 config_dir: None,
1502 schema_cache_ttl: None,
1503 };
1504 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1505 assert!(!result.has_errors());
1506 Ok(())
1507 }
1508
1509 #[tokio::test]
1512 async fn custom_cache_dir() -> anyhow::Result<()> {
1513 let c = ValidateArgs {
1514 globs: scenario_globs(&["positive_tests"]),
1515 exclude: vec![],
1516 cache_dir: None,
1517 force_schema_fetch: true,
1518 force_validation: true,
1519 no_catalog: true,
1520 config_dir: None,
1521 schema_cache_ttl: None,
1522 };
1523 let result = run_with(&c, Some(schema_mock()), |_| {}).await?;
1524 assert!(!result.has_errors());
1525 Ok(())
1526 }
1527
1528 #[tokio::test]
1531 async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1532 let tmp = tempfile::tempdir()?;
1533 let schema_path = tmp.path().join("schema.json");
1534 fs::write(&schema_path, SCHEMA)?;
1535
1536 let f = tmp.path().join("valid.json");
1537 fs::write(
1538 &f,
1539 format!(
1540 r#"{{"$schema":"{}","name":"hello"}}"#,
1541 schema_path.to_string_lossy()
1542 ),
1543 )?;
1544
1545 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1546 let c = ValidateArgs {
1547 globs: vec![pattern],
1548 exclude: vec![],
1549 cache_dir: None,
1550 force_schema_fetch: true,
1551 force_validation: true,
1552 no_catalog: true,
1553 config_dir: None,
1554 schema_cache_ttl: None,
1555 };
1556 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1557 assert!(!result.has_errors());
1558 Ok(())
1559 }
1560
1561 #[tokio::test]
1562 async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1563 let tmp = tempfile::tempdir()?;
1564 let schema_path = tmp.path().join("schema.json");
1565 fs::write(&schema_path, SCHEMA)?;
1566
1567 let f = tmp.path().join("valid.yaml");
1568 fs::write(
1569 &f,
1570 format!(
1571 "# yaml-language-server: $schema={}\nname: hello\n",
1572 schema_path.to_string_lossy()
1573 ),
1574 )?;
1575
1576 let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1577 let c = ValidateArgs {
1578 globs: vec![pattern],
1579 exclude: vec![],
1580 cache_dir: None,
1581 force_schema_fetch: true,
1582 force_validation: true,
1583 no_catalog: true,
1584 config_dir: None,
1585 schema_cache_ttl: None,
1586 };
1587 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1588 assert!(!result.has_errors());
1589 Ok(())
1590 }
1591
1592 #[tokio::test]
1593 async fn missing_local_schema_errors() -> anyhow::Result<()> {
1594 let tmp = tempfile::tempdir()?;
1595 let f = tmp.path().join("ref.json");
1596 fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1597
1598 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1599 let c = ValidateArgs {
1600 globs: vec![pattern],
1601 exclude: vec![],
1602 cache_dir: None,
1603 force_schema_fetch: true,
1604 force_validation: true,
1605 no_catalog: true,
1606 config_dir: None,
1607 schema_cache_ttl: None,
1608 };
1609 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1610 assert!(result.has_errors());
1611 Ok(())
1612 }
1613
1614 #[tokio::test]
1617 async fn json5_valid_with_schema() -> anyhow::Result<()> {
1618 let tmp = tempfile::tempdir()?;
1619 let schema_path = tmp.path().join("schema.json");
1620 fs::write(&schema_path, SCHEMA)?;
1621
1622 let f = tmp.path().join("config.json5");
1623 fs::write(
1624 &f,
1625 format!(
1626 r#"{{
1627 // JSON5 comment
1628 "$schema": "{}",
1629 name: "hello",
1630}}"#,
1631 schema_path.to_string_lossy()
1632 ),
1633 )?;
1634
1635 let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1636 let c = ValidateArgs {
1637 globs: vec![pattern],
1638 exclude: vec![],
1639 cache_dir: None,
1640 force_schema_fetch: true,
1641 force_validation: true,
1642 no_catalog: true,
1643 config_dir: None,
1644 schema_cache_ttl: None,
1645 };
1646 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1647 assert!(!result.has_errors());
1648 Ok(())
1649 }
1650
1651 #[tokio::test]
1652 async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1653 let tmp = tempfile::tempdir()?;
1654 let schema_path = tmp.path().join("schema.json");
1655 fs::write(&schema_path, SCHEMA)?;
1656
1657 let f = tmp.path().join("config.jsonc");
1658 fs::write(
1659 &f,
1660 format!(
1661 r#"{{
1662 /* JSONC comment */
1663 "$schema": "{}",
1664 "name": "hello"
1665}}"#,
1666 schema_path.to_string_lossy()
1667 ),
1668 )?;
1669
1670 let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1671 let c = ValidateArgs {
1672 globs: vec![pattern],
1673 exclude: vec![],
1674 cache_dir: None,
1675 force_schema_fetch: true,
1676 force_validation: true,
1677 no_catalog: true,
1678 config_dir: None,
1679 schema_cache_ttl: None,
1680 };
1681 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1682 assert!(!result.has_errors());
1683 Ok(())
1684 }
1685
1686 const GH_WORKFLOW_SCHEMA: &str = r#"{
1689 "type": "object",
1690 "properties": {
1691 "name": { "type": "string" },
1692 "on": {},
1693 "jobs": { "type": "object" }
1694 },
1695 "required": ["on", "jobs"]
1696 }"#;
1697
1698 fn gh_catalog_json() -> String {
1699 r#"{"version":1,"schemas":[{
1700 "name": "GitHub Workflow",
1701 "description": "GitHub Actions workflow",
1702 "url": "https://www.schemastore.org/github-workflow.json",
1703 "fileMatch": [
1704 "**/.github/workflows/*.yml",
1705 "**/.github/workflows/*.yaml"
1706 ]
1707 }]}"#
1708 .to_string()
1709 }
1710
1711 #[tokio::test]
1712 async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1713 let tmp = tempfile::tempdir()?;
1714 let cache_tmp = tempfile::tempdir()?;
1715 let wf_dir = tmp.path().join(".github/workflows");
1716 fs::create_dir_all(&wf_dir)?;
1717 fs::write(
1718 wf_dir.join("ci.yml"),
1719 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1720 )?;
1721
1722 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1723 let client = mock(&[
1724 (
1725 "https://www.schemastore.org/api/json/catalog.json",
1726 &gh_catalog_json(),
1727 ),
1728 (
1729 "https://www.schemastore.org/github-workflow.json",
1730 GH_WORKFLOW_SCHEMA,
1731 ),
1732 ]);
1733 let c = ValidateArgs {
1734 globs: vec![pattern],
1735 exclude: vec![],
1736 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1737 force_schema_fetch: true,
1738 force_validation: true,
1739 no_catalog: false,
1740 config_dir: None,
1741 schema_cache_ttl: None,
1742 };
1743 let result = run_with(&c, Some(client), |_| {}).await?;
1744 assert!(!result.has_errors());
1745 Ok(())
1746 }
1747
1748 #[tokio::test]
1749 async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1750 let tmp = tempfile::tempdir()?;
1751 let cache_tmp = tempfile::tempdir()?;
1752 let wf_dir = tmp.path().join(".github/workflows");
1753 fs::create_dir_all(&wf_dir)?;
1754 fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1755
1756 let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1757 let client = mock(&[
1758 (
1759 "https://www.schemastore.org/api/json/catalog.json",
1760 &gh_catalog_json(),
1761 ),
1762 (
1763 "https://www.schemastore.org/github-workflow.json",
1764 GH_WORKFLOW_SCHEMA,
1765 ),
1766 ]);
1767 let c = ValidateArgs {
1768 globs: vec![pattern],
1769 exclude: vec![],
1770 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1771 force_schema_fetch: true,
1772 force_validation: true,
1773 no_catalog: false,
1774 config_dir: None,
1775 schema_cache_ttl: None,
1776 };
1777 let result = run_with(&c, Some(client), |_| {}).await?;
1778 assert!(result.has_errors());
1779 Ok(())
1780 }
1781
1782 #[tokio::test]
1783 async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1784 let tmp = tempfile::tempdir()?;
1785 let cache_tmp = tempfile::tempdir()?;
1786 let wf_dir = tmp.path().join(".github/workflows");
1787 fs::create_dir_all(&wf_dir)?;
1788 fs::write(
1789 wf_dir.join("ci.yml"),
1790 "name: CI\non: push\njobs:\n build:\n runs-on: ubuntu-latest\n steps: []\n",
1791 )?;
1792
1793 let client = mock(&[
1794 (
1795 "https://www.schemastore.org/api/json/catalog.json",
1796 &gh_catalog_json(),
1797 ),
1798 (
1799 "https://www.schemastore.org/github-workflow.json",
1800 GH_WORKFLOW_SCHEMA,
1801 ),
1802 ]);
1803 let c = ValidateArgs {
1804 globs: vec![],
1805 exclude: vec![],
1806 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1807 force_schema_fetch: true,
1808 force_validation: true,
1809 no_catalog: false,
1810 config_dir: None,
1811 schema_cache_ttl: None,
1812 };
1813
1814 let orig_dir = std::env::current_dir()?;
1815 std::env::set_current_dir(tmp.path())?;
1816 let result = run_with(&c, Some(client), |_| {}).await?;
1817 std::env::set_current_dir(orig_dir)?;
1818
1819 assert!(!result.has_errors());
1820 Ok(())
1821 }
1822
1823 #[tokio::test]
1826 async fn toml_valid_with_schema() -> anyhow::Result<()> {
1827 let tmp = tempfile::tempdir()?;
1828 let schema_path = tmp.path().join("schema.json");
1829 fs::write(&schema_path, SCHEMA)?;
1830
1831 let f = tmp.path().join("config.toml");
1832 fs::write(
1833 &f,
1834 format!(
1835 "# :schema {}\nname = \"hello\"\n",
1836 schema_path.to_string_lossy()
1837 ),
1838 )?;
1839
1840 let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1841 let c = ValidateArgs {
1842 globs: vec![pattern],
1843 exclude: vec![],
1844 cache_dir: None,
1845 force_schema_fetch: true,
1846 force_validation: true,
1847 no_catalog: true,
1848 config_dir: None,
1849 schema_cache_ttl: None,
1850 };
1851 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1852 assert!(!result.has_errors());
1853 Ok(())
1854 }
1855
1856 #[tokio::test]
1859 async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1860 let tmp = tempfile::tempdir()?;
1861
1862 let schemas_dir = tmp.path().join("schemas");
1863 fs::create_dir_all(&schemas_dir)?;
1864 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1865
1866 fs::write(
1867 tmp.path().join("lintel.toml"),
1868 r#"
1869[rewrite]
1870"http://localhost:9000/" = "//schemas/"
1871"#,
1872 )?;
1873
1874 let f = tmp.path().join("config.json");
1875 fs::write(
1876 &f,
1877 r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1878 )?;
1879
1880 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1881 let c = ValidateArgs {
1882 globs: vec![pattern],
1883 exclude: vec![],
1884 cache_dir: None,
1885 force_schema_fetch: true,
1886 force_validation: true,
1887 no_catalog: true,
1888 config_dir: Some(tmp.path().to_path_buf()),
1889 schema_cache_ttl: None,
1890 };
1891
1892 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1893 assert!(!result.has_errors());
1894 assert_eq!(result.files_checked(), 1);
1895 Ok(())
1896 }
1897
1898 #[tokio::test]
1899 async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1900 let tmp = tempfile::tempdir()?;
1901
1902 let schemas_dir = tmp.path().join("schemas");
1903 fs::create_dir_all(&schemas_dir)?;
1904 fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1905
1906 fs::write(tmp.path().join("lintel.toml"), "")?;
1907
1908 let sub = tmp.path().join("deeply/nested");
1909 fs::create_dir_all(&sub)?;
1910 let f = sub.join("config.json");
1911 fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1912
1913 let pattern = sub.join("*.json").to_string_lossy().to_string();
1914 let c = ValidateArgs {
1915 globs: vec![pattern],
1916 exclude: vec![],
1917 cache_dir: None,
1918 force_schema_fetch: true,
1919 force_validation: true,
1920 no_catalog: true,
1921 config_dir: Some(tmp.path().to_path_buf()),
1922 schema_cache_ttl: None,
1923 };
1924
1925 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1926 assert!(!result.has_errors());
1927 Ok(())
1928 }
1929
1930 const FORMAT_SCHEMA: &str = r#"{
1933 "type": "object",
1934 "properties": {
1935 "link": { "type": "string", "format": "uri-reference" }
1936 }
1937 }"#;
1938
1939 #[tokio::test]
1940 async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1941 let tmp = tempfile::tempdir()?;
1942 let schema_path = tmp.path().join("schema.json");
1943 fs::write(&schema_path, FORMAT_SCHEMA)?;
1944
1945 let f = tmp.path().join("data.json");
1946 fs::write(
1947 &f,
1948 format!(
1949 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1950 schema_path.to_string_lossy()
1951 ),
1952 )?;
1953
1954 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1955 let c = ValidateArgs {
1956 globs: vec![pattern],
1957 exclude: vec![],
1958 cache_dir: None,
1959 force_schema_fetch: true,
1960 force_validation: true,
1961 no_catalog: true,
1962 config_dir: Some(tmp.path().to_path_buf()),
1963 schema_cache_ttl: None,
1964 };
1965 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
1966 assert!(
1967 result.has_errors(),
1968 "expected format error without override"
1969 );
1970 Ok(())
1971 }
1972
1973 #[tokio::test]
1974 async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1975 let tmp = tempfile::tempdir()?;
1976 let schema_path = tmp.path().join("schema.json");
1977 fs::write(&schema_path, FORMAT_SCHEMA)?;
1978
1979 let f = tmp.path().join("data.json");
1980 fs::write(
1981 &f,
1982 format!(
1983 r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1984 schema_path.to_string_lossy()
1985 ),
1986 )?;
1987
1988 fs::write(
1990 tmp.path().join("lintel.toml"),
1991 r#"
1992[[override]]
1993files = ["**/data.json"]
1994validate_formats = false
1995"#,
1996 )?;
1997
1998 let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1999 let c = ValidateArgs {
2000 globs: vec![pattern],
2001 exclude: vec![],
2002 cache_dir: None,
2003 force_schema_fetch: true,
2004 force_validation: true,
2005 no_catalog: true,
2006 config_dir: Some(tmp.path().to_path_buf()),
2007 schema_cache_ttl: None,
2008 };
2009 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2010 assert!(
2011 !result.has_errors(),
2012 "expected no errors with validate_formats = false override"
2013 );
2014 Ok(())
2015 }
2016
2017 #[tokio::test]
2020 async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
2021 let tmp = tempfile::tempdir()?;
2022 fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
2023
2024 let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
2025 let c = ValidateArgs {
2026 globs: vec![pattern],
2027 exclude: vec![],
2028 cache_dir: None,
2029 force_schema_fetch: true,
2030 force_validation: true,
2031 no_catalog: true,
2032 config_dir: Some(tmp.path().to_path_buf()),
2033 schema_cache_ttl: None,
2034 };
2035 let result = run_with(&c, Some(mock(&[])), |_| {}).await?;
2036 assert!(!result.has_errors());
2037 assert_eq!(result.files_checked(), 0);
2038 Ok(())
2039 }
2040
2041 #[tokio::test]
2042 async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
2043 let tmp = tempfile::tempdir()?;
2044 let cache_tmp = tempfile::tempdir()?;
2045 fs::write(
2047 tmp.path().join("myapp.cfg"),
2048 r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
2049 )?;
2050
2051 let catalog_json = r#"{"version":1,"schemas":[{
2052 "name": "MyApp Config",
2053 "description": "MyApp configuration",
2054 "url": "https://example.com/myapp.schema.json",
2055 "fileMatch": ["*.cfg"]
2056 }]}"#;
2057 let schema =
2058 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2059
2060 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2061 let client = mock(&[
2062 (
2063 "https://www.schemastore.org/api/json/catalog.json",
2064 catalog_json,
2065 ),
2066 ("https://example.com/myapp.schema.json", schema),
2067 ]);
2068 let c = ValidateArgs {
2069 globs: vec![pattern],
2070 exclude: vec![],
2071 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2072 force_schema_fetch: true,
2073 force_validation: true,
2074 no_catalog: false,
2075 config_dir: Some(tmp.path().to_path_buf()),
2076 schema_cache_ttl: None,
2077 };
2078 let result = run_with(&c, Some(client), |_| {}).await?;
2079 assert!(!result.has_errors());
2080 assert_eq!(result.files_checked(), 1);
2081 Ok(())
2082 }
2083
2084 #[tokio::test]
2085 async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
2086 let tmp = tempfile::tempdir()?;
2087 let cache_tmp = tempfile::tempdir()?;
2088 fs::write(
2090 tmp.path().join("myapp.cfg"),
2091 "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
2092 )?;
2093
2094 let catalog_json = r#"{"version":1,"schemas":[{
2095 "name": "MyApp Config",
2096 "description": "MyApp configuration",
2097 "url": "https://example.com/myapp.schema.json",
2098 "fileMatch": ["*.cfg"]
2099 }]}"#;
2100
2101 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2102 let client = mock(&[(
2103 "https://www.schemastore.org/api/json/catalog.json",
2104 catalog_json,
2105 )]);
2106 let c = ValidateArgs {
2107 globs: vec![pattern],
2108 exclude: vec![],
2109 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2110 force_schema_fetch: true,
2111 force_validation: true,
2112 no_catalog: false,
2113 config_dir: Some(tmp.path().to_path_buf()),
2114 schema_cache_ttl: None,
2115 };
2116 let result = run_with(&c, Some(client), |_| {}).await?;
2117 assert!(!result.has_errors());
2118 assert_eq!(result.files_checked(), 0);
2119 Ok(())
2120 }
2121
2122 #[tokio::test]
2123 async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
2124 let tmp = tempfile::tempdir()?;
2125 let cache_tmp = tempfile::tempdir()?;
2126 fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
2128
2129 let catalog_json = r#"{"version":1,"schemas":[{
2130 "name": "MyApp Config",
2131 "description": "MyApp configuration",
2132 "url": "https://example.com/myapp.schema.json",
2133 "fileMatch": ["*.cfg"]
2134 }]}"#;
2135 let schema =
2136 r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
2137
2138 let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
2139 let client = mock(&[
2140 (
2141 "https://www.schemastore.org/api/json/catalog.json",
2142 catalog_json,
2143 ),
2144 ("https://example.com/myapp.schema.json", schema),
2145 ]);
2146 let c = ValidateArgs {
2147 globs: vec![pattern],
2148 exclude: vec![],
2149 cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
2150 force_schema_fetch: true,
2151 force_validation: true,
2152 no_catalog: false,
2153 config_dir: Some(tmp.path().to_path_buf()),
2154 schema_cache_ttl: None,
2155 };
2156 let result = run_with(&c, Some(client), |_| {}).await?;
2157 assert!(result.has_errors());
2158 assert_eq!(result.files_checked(), 1);
2159 Ok(())
2160 }
2161
2162 #[tokio::test]
2165 async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
2166 let tmp = tempfile::tempdir()?;
2167 let schema_path = tmp.path().join("schema.json");
2168 fs::write(&schema_path, SCHEMA)?;
2169
2170 let f = tmp.path().join("valid.json");
2171 fs::write(
2172 &f,
2173 format!(
2174 r#"{{"$schema":"{}","name":"hello"}}"#,
2175 schema_path.to_string_lossy()
2176 ),
2177 )?;
2178
2179 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2180
2181 let c = ValidateArgs {
2183 globs: vec![pattern.clone()],
2184 exclude: vec![],
2185 cache_dir: None,
2186 force_schema_fetch: true,
2187 force_validation: false,
2188 no_catalog: true,
2189 config_dir: None,
2190 schema_cache_ttl: None,
2191 };
2192 let mut first_statuses = Vec::new();
2193 let result = run_with(&c, Some(mock(&[])), |cf| {
2194 first_statuses.push(cf.validation_cache_status);
2195 })
2196 .await?;
2197 assert!(!result.has_errors());
2198 assert!(result.files_checked() > 0);
2199
2200 assert!(
2202 first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
2203 "expected at least one validation cache miss on first run"
2204 );
2205
2206 let mut second_statuses = Vec::new();
2208 let result = run_with(&c, Some(mock(&[])), |cf| {
2209 second_statuses.push(cf.validation_cache_status);
2210 })
2211 .await?;
2212 assert!(!result.has_errors());
2213
2214 assert!(
2216 second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
2217 "expected at least one validation cache hit on second run"
2218 );
2219 Ok(())
2220 }
2221
2222 #[tokio::test]
2226 async fn schema_uri_with_fragment_compiles() -> anyhow::Result<()> {
2227 let tmp = tempfile::tempdir()?;
2228
2229 let schema_body = r#"{
2231 "$schema": "http://json-schema.org/draft-07/schema#",
2232 "type": "object",
2233 "properties": { "name": { "type": "string" } },
2234 "required": ["name"]
2235 }"#;
2236
2237 let schema_url = "http://json-schema.org/draft-07/schema#";
2238
2239 let f = tmp.path().join("data.json");
2240 fs::write(
2241 &f,
2242 format!(r#"{{ "$schema": "{schema_url}", "name": "hello" }}"#),
2243 )?;
2244
2245 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2246 let client = mock(&[(
2247 schema_url,
2249 schema_body,
2250 )]);
2251 let c = ValidateArgs {
2252 globs: vec![pattern],
2253 exclude: vec![],
2254 cache_dir: None,
2255 force_schema_fetch: true,
2256 force_validation: true,
2257 no_catalog: true,
2258 config_dir: None,
2259 schema_cache_ttl: None,
2260 };
2261 let result = run_with(&c, Some(client), |_| {}).await?;
2262 assert!(
2263 !result.has_errors(),
2264 "schema URI with fragment should not cause compilation error"
2265 );
2266 assert_eq!(result.files_checked(), 1);
2267 Ok(())
2268 }
2269
2270 #[tokio::test]
2271 async fn relative_ref_in_local_schema() -> anyhow::Result<()> {
2272 let tmp = tempfile::tempdir()?;
2273
2274 std::fs::write(tmp.path().join("defs.json"), r#"{"type": "string"}"#)?;
2276
2277 let schema_path = tmp.path().join("schema.json");
2279 std::fs::write(
2280 &schema_path,
2281 r#"{
2282 "type": "object",
2283 "properties": {
2284 "name": { "$ref": "./defs.json" }
2285 },
2286 "required": ["name"]
2287 }"#,
2288 )?;
2289
2290 let schema_uri = schema_path.to_string_lossy();
2292 std::fs::write(
2293 tmp.path().join("data.json"),
2294 format!(r#"{{ "$schema": "{schema_uri}", "name": "hello" }}"#),
2295 )?;
2296
2297 std::fs::write(
2299 tmp.path().join("bad.json"),
2300 format!(r#"{{ "$schema": "{schema_uri}", "name": 42 }}"#),
2301 )?;
2302
2303 let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
2304 let args = ValidateArgs {
2305 globs: vec![pattern],
2306 exclude: vec![],
2307 cache_dir: None,
2308 force_schema_fetch: true,
2309 force_validation: true,
2310 no_catalog: true,
2311 config_dir: None,
2312 schema_cache_ttl: None,
2313 };
2314 let result = run_with(&args, Some(mock(&[])), |_| {}).await?;
2315
2316 assert!(result.has_errors());
2318 assert_eq!(result.errors.len(), 1);
2320 Ok(())
2321 }
2322}