1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::io::IsTerminal;
5use std::path::{Path, PathBuf};
6
7use anyhow::{Context, Result};
8use bpaf::{Bpaf, ShellComp};
9use lintel_cli_common::{CLIGlobalOptions, CliCacheOptions};
10
11use lintel_schema_cache::SchemaCache;
12use lintel_validate::parsers;
13use lintel_validate::validate;
14use schema_catalog::{FileFormat, SchemaMatch};
15
16#[derive(Debug, Clone, Bpaf)]
21#[bpaf(generate(identify_args_inner))]
22pub struct IdentifyArgs {
23 #[bpaf(long("explain"), switch)]
25 pub explain: bool,
26
27 #[bpaf(external(lintel_cli_common::cli_cache_options))]
28 pub cache: CliCacheOptions,
29
30 #[bpaf(long("no-syntax-highlighting"), switch)]
32 pub no_syntax_highlighting: bool,
33
34 #[bpaf(long("no-pager"), switch)]
36 pub no_pager: bool,
37
38 #[bpaf(positional("FILE"), complete_shell(ShellComp::File { mask: None }))]
40 pub file: String,
41}
42
43pub fn identify_args() -> impl bpaf::Parser<IdentifyArgs> {
45 identify_args_inner()
46}
47
48#[derive(Debug)]
54enum SchemaSource {
55 Inline,
56 Config,
57 Catalog,
58}
59
60impl core::fmt::Display for SchemaSource {
61 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62 match self {
63 SchemaSource::Inline => write!(f, "inline"),
64 SchemaSource::Config => write!(f, "config"),
65 SchemaSource::Catalog => write!(f, "catalog"),
66 }
67 }
68}
69
70struct ResolvedSchema<'a> {
72 uri: String,
73 source: SchemaSource,
74 catalog_match: Option<CatalogMatchInfo<'a>>,
76 config_pattern: Option<&'a str>,
78}
79
80struct CatalogMatchInfo<'a> {
82 matched_pattern: &'a str,
83 file_match: &'a [String],
84 name: &'a str,
85 description: Option<&'a str>,
86}
87
88impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
89 fn from(m: SchemaMatch<'a>) -> Self {
90 Self {
91 matched_pattern: m.matched_pattern,
92 file_match: m.file_match,
93 name: m.name,
94 description: m.description,
95 }
96 }
97}
98
99pub struct ResolvedFileSchema {
105 pub schema_uri: String,
107 pub display_name: String,
109 pub is_remote: bool,
111}
112
113pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
115 let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
116 if let Some(dir) = &cache.cache_dir {
117 builder = builder.cache_dir(PathBuf::from(dir));
118 }
119 if let Some(ttl) = cache.schema_cache_ttl {
120 builder = builder.ttl(ttl);
121 }
122 builder.build()
123}
124
125#[allow(clippy::missing_panics_doc)]
134pub async fn resolve_schema_for_file(
135 file_path: &Path,
136 cache: &CliCacheOptions,
137) -> Result<Option<ResolvedFileSchema>> {
138 let path_str = file_path.display().to_string();
139 let content =
140 std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
141
142 resolve_schema_for_content(&content, file_path, None, cache).await
143}
144
145#[allow(clippy::missing_panics_doc)]
157pub async fn resolve_schema_for_content(
158 content: &str,
159 file_path: &Path,
160 config_search_dir: Option<&Path>,
161 cache: &CliCacheOptions,
162) -> Result<Option<ResolvedFileSchema>> {
163 let path_str = file_path.display().to_string();
164 let file_name = file_path
165 .file_name()
166 .and_then(|n| n.to_str())
167 .unwrap_or(&path_str);
168
169 let retriever = build_retriever(cache);
170
171 let search_dir = config_search_dir
172 .map(Path::to_path_buf)
173 .or_else(|| file_path.parent().map(Path::to_path_buf));
174 let (cfg, config_dir, _config_path) = validate::load_config(search_dir.as_deref());
175
176 let compiled_catalogs =
177 validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
178
179 let detected_format = parsers::detect_format(file_path);
180 let (parser, instance) = parse_file(detected_format, content, &path_str);
181
182 let Some(resolved) = resolve_schema(
183 parser.as_ref(),
184 content,
185 &instance,
186 &path_str,
187 file_name,
188 &cfg,
189 &compiled_catalogs,
190 ) else {
191 return Ok(None);
192 };
193
194 let from_inline = matches!(resolved.source, SchemaSource::Inline);
195 let (schema_uri, is_remote) = finalize_uri(
196 &resolved.uri,
197 &cfg.rewrite,
198 &config_dir,
199 file_path,
200 from_inline,
201 );
202
203 let display_name = resolved
204 .catalog_match
205 .as_ref()
206 .map(|m| m.name.to_string())
207 .or_else(|| {
208 compiled_catalogs
209 .iter()
210 .find_map(|cat| cat.schema_name(&schema_uri))
211 .map(str::to_string)
212 })
213 .unwrap_or_else(|| schema_uri.clone());
214
215 Ok(Some(ResolvedFileSchema {
216 schema_uri,
217 display_name,
218 is_remote,
219 }))
220}
221
222#[allow(clippy::missing_panics_doc)]
233pub async fn resolve_schema_for_path(
234 file_path: &Path,
235 cache: &CliCacheOptions,
236) -> Result<Option<ResolvedFileSchema>> {
237 let path_str = file_path.display().to_string();
238 let file_name = file_path
239 .file_name()
240 .and_then(|n| n.to_str())
241 .unwrap_or(&path_str);
242
243 let retriever = build_retriever(cache);
244
245 let config_search_dir = file_path.parent().map(Path::to_path_buf);
246 let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
247
248 let compiled_catalogs =
249 validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
250
251 let Some(resolved) = resolve_schema_path_only(&path_str, file_name, &cfg, &compiled_catalogs)
252 else {
253 return Ok(None);
254 };
255
256 let from_inline = matches!(resolved.source, SchemaSource::Inline);
257 let (schema_uri, is_remote) = finalize_uri(
258 &resolved.uri,
259 &cfg.rewrite,
260 &config_dir,
261 file_path,
262 from_inline,
263 );
264
265 let display_name = resolved
266 .catalog_match
267 .as_ref()
268 .map(|m| m.name.to_string())
269 .or_else(|| {
270 compiled_catalogs
271 .iter()
272 .find_map(|cat| cat.schema_name(&schema_uri))
273 .map(str::to_string)
274 })
275 .unwrap_or_else(|| schema_uri.clone());
276
277 Ok(Some(ResolvedFileSchema {
278 schema_uri,
279 display_name,
280 is_remote,
281 }))
282}
283
284#[allow(clippy::missing_panics_doc, clippy::missing_errors_doc)]
289pub async fn run(args: IdentifyArgs, global: &CLIGlobalOptions) -> Result<bool> {
290 let file_path = Path::new(&args.file);
291 if !file_path.exists() {
292 anyhow::bail!("file not found: {}", args.file);
293 }
294
295 let content = std::fs::read_to_string(file_path)
296 .with_context(|| format!("failed to read {}", args.file))?;
297
298 let path_str = file_path.display().to_string();
299 let file_name = file_path
300 .file_name()
301 .and_then(|n| n.to_str())
302 .unwrap_or(&path_str);
303
304 let retriever = build_retriever(&args.cache);
305
306 let config_search_dir = file_path.parent().map(Path::to_path_buf);
307 let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
308
309 let compiled_catalogs =
310 validate::fetch_compiled_catalogs(&retriever, &cfg, args.cache.no_catalog).await;
311
312 let detected_format = parsers::detect_format(file_path);
313 let (parser, instance) = parse_file(detected_format, &content, &path_str);
314
315 let Some(resolved) = resolve_schema(
316 parser.as_ref(),
317 &content,
318 &instance,
319 &path_str,
320 file_name,
321 &cfg,
322 &compiled_catalogs,
323 ) else {
324 eprintln!("{path_str}");
325 eprintln!(" no schema found");
326 return Ok(false);
327 };
328
329 let from_inline = matches!(resolved.source, SchemaSource::Inline);
330 let (schema_uri, is_remote) = finalize_uri(
331 &resolved.uri,
332 &cfg.rewrite,
333 &config_dir,
334 file_path,
335 from_inline,
336 );
337
338 let display_name = resolved
339 .catalog_match
340 .as_ref()
341 .map(|m| m.name)
342 .or_else(|| {
343 compiled_catalogs
344 .iter()
345 .find_map(|cat| cat.schema_name(&schema_uri))
346 })
347 .unwrap_or(&schema_uri);
348
349 print_identification(&path_str, &schema_uri, display_name, &resolved);
350
351 if args.explain {
352 run_explain(
353 &args,
354 global,
355 &schema_uri,
356 display_name,
357 is_remote,
358 &retriever,
359 )
360 .await?;
361 }
362
363 Ok(false)
364}
365
366#[allow(clippy::too_many_arguments)]
368fn resolve_schema<'a>(
369 parser: &dyn parsers::Parser,
370 content: &str,
371 instance: &serde_json::Value,
372 path_str: &str,
373 file_name: &'a str,
374 cfg: &'a lintel_config::Config,
375 catalogs: &'a [schema_catalog::CompiledCatalog],
376) -> Option<ResolvedSchema<'a>> {
377 if let Some(uri) = parser.extract_schema_uri(content, instance) {
378 return Some(ResolvedSchema {
379 uri,
380 source: SchemaSource::Inline,
381 catalog_match: None,
382 config_pattern: None,
383 });
384 }
385
386 resolve_schema_path_only(path_str, file_name, cfg, catalogs)
387}
388
389fn resolve_schema_path_only<'a>(
391 path_str: &str,
392 file_name: &'a str,
393 cfg: &'a lintel_config::Config,
394 catalogs: &'a [schema_catalog::CompiledCatalog],
395) -> Option<ResolvedSchema<'a>> {
396 if let Some((pattern, url)) = cfg
397 .schemas
398 .iter()
399 .find(|(pattern, _)| {
400 let p = path_str.strip_prefix("./").unwrap_or(path_str);
401 glob_match::glob_match(pattern, p) || glob_match::glob_match(pattern, file_name)
402 })
403 .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
404 {
405 return Some(ResolvedSchema {
406 uri: url.to_string(),
407 source: SchemaSource::Config,
408 catalog_match: None,
409 config_pattern: Some(pattern),
410 });
411 }
412
413 catalogs
414 .iter()
415 .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
416 .map(|schema_match| ResolvedSchema {
417 uri: schema_match.url.to_string(),
418 source: SchemaSource::Catalog,
419 catalog_match: Some(schema_match.into()),
420 config_pattern: None,
421 })
422}
423
424#[allow(clippy::too_many_arguments)]
430fn finalize_uri(
431 raw_uri: &str,
432 rewrites: &HashMap<String, String>,
433 config_dir: &Path,
434 file_path: &Path,
435 from_inline: bool,
436) -> (String, bool) {
437 let schema_uri = lintel_config::apply_rewrites(raw_uri, rewrites);
438 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
439
440 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
441 let schema_uri = if is_remote {
442 schema_uri
443 } else {
444 let base_dir = if from_inline {
445 file_path.parent()
446 } else {
447 Some(config_dir)
448 };
449 base_dir
450 .map(|dir| dir.join(&schema_uri).to_string_lossy().to_string())
451 .unwrap_or(schema_uri)
452 };
453
454 (schema_uri, is_remote)
455}
456
457fn print_identification(
459 path_str: &str,
460 schema_uri: &str,
461 display_name: &str,
462 resolved: &ResolvedSchema<'_>,
463) {
464 println!("{path_str}");
465 if display_name == schema_uri {
466 println!(" schema: {schema_uri}");
467 } else {
468 println!(" schema: {display_name} ({schema_uri})");
469 }
470 println!(" source: {}", resolved.source);
471
472 match &resolved.source {
473 SchemaSource::Inline => {}
474 SchemaSource::Config => {
475 if let Some(pattern) = resolved.config_pattern {
476 println!(" matched: {pattern}");
477 }
478 }
479 SchemaSource::Catalog => {
480 if let Some(ref m) = resolved.catalog_match {
481 println!(" matched: {}", m.matched_pattern);
482 if m.file_match.len() > 1 {
483 let globs = m
484 .file_match
485 .iter()
486 .map(String::as_str)
487 .collect::<Vec<_>>()
488 .join(", ");
489 println!(" globs: {globs}");
490 }
491 if let Some(desc) = m.description {
492 println!(" description: {desc}");
493 }
494 }
495 }
496 }
497}
498
499#[allow(clippy::too_many_arguments)]
501async fn run_explain(
502 args: &IdentifyArgs,
503 global: &CLIGlobalOptions,
504 schema_uri: &str,
505 display_name: &str,
506 is_remote: bool,
507 retriever: &SchemaCache,
508) -> Result<()> {
509 let schema_value = if is_remote {
510 match retriever.fetch(schema_uri).await {
511 Ok((val, _)) => val,
512 Err(e) => {
513 eprintln!(" error fetching schema: {e}");
514 return Ok(());
515 }
516 }
517 } else {
518 let schema_content = std::fs::read_to_string(schema_uri)
519 .with_context(|| format!("failed to read schema: {schema_uri}"))?;
520 serde_json::from_str(&schema_content)
521 .with_context(|| format!("failed to parse schema: {schema_uri}"))?
522 };
523
524 let is_tty = std::io::stdout().is_terminal();
525 let use_color = match global.colors {
526 Some(lintel_cli_common::ColorsArg::Force) => true,
527 Some(lintel_cli_common::ColorsArg::Off) => false,
528 None => is_tty,
529 };
530 let opts = jsonschema_explain::ExplainOptions {
531 color: use_color,
532 syntax_highlight: use_color && !args.no_syntax_highlighting,
533 width: terminal_size::terminal_size()
534 .map(|(w, _)| w.0 as usize)
535 .or_else(|| std::env::var("COLUMNS").ok()?.parse().ok())
536 .unwrap_or(80),
537 validation_errors: vec![],
538 };
539 let output = jsonschema_explain::explain(&schema_value, display_name, &opts);
540
541 if is_tty && !args.no_pager {
542 lintel_cli_common::pipe_to_pager(&format!("\n{output}"));
543 } else {
544 println!();
545 print!("{output}");
546 }
547 Ok(())
548}
549
550fn parse_file(
554 detected_format: Option<FileFormat>,
555 content: &str,
556 path_str: &str,
557) -> (Box<dyn parsers::Parser>, serde_json::Value) {
558 if let Some(fmt) = detected_format {
559 let parser = parsers::parser_for(fmt);
560 if let Ok(val) = parser.parse(content, path_str) {
561 return (parser, val);
562 }
563 if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
565 return (parsers::parser_for(fmt), val);
566 }
567 eprintln!("{path_str}");
568 eprintln!(" no schema found (file could not be parsed)");
569 std::process::exit(0);
570 }
571
572 if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
573 return (parsers::parser_for(fmt), val);
574 }
575
576 eprintln!("{path_str}");
577 eprintln!(" no schema found (unrecognized format)");
578 std::process::exit(0);
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584
585 use bpaf::Parser;
586 use lintel_cli_common::cli_global_options;
587
588 fn test_cli() -> bpaf::OptionParser<(CLIGlobalOptions, IdentifyArgs)> {
590 bpaf::construct!(cli_global_options(), identify_args())
591 .to_options()
592 .descr("test identify args")
593 }
594
595 #[test]
596 fn cli_parses_identify_basic() -> anyhow::Result<()> {
597 let (_, args) = test_cli()
598 .run_inner(&["file.json"])
599 .map_err(|e| anyhow::anyhow!("{e:?}"))?;
600 assert_eq!(args.file, "file.json");
601 assert!(!args.explain);
602 assert!(!args.cache.no_catalog);
603 assert!(!args.cache.force_schema_fetch);
604 assert!(args.cache.cache_dir.is_none());
605 assert!(args.cache.schema_cache_ttl.is_none());
606 Ok(())
607 }
608
609 #[test]
610 fn cli_parses_identify_explain() -> anyhow::Result<()> {
611 let (_, args) = test_cli()
612 .run_inner(&["file.json", "--explain"])
613 .map_err(|e| anyhow::anyhow!("{e:?}"))?;
614 assert_eq!(args.file, "file.json");
615 assert!(args.explain);
616 Ok(())
617 }
618
619 #[test]
620 fn cli_parses_identify_no_catalog() -> anyhow::Result<()> {
621 let (_, args) = test_cli()
622 .run_inner(&["--no-catalog", "file.json"])
623 .map_err(|e| anyhow::anyhow!("{e:?}"))?;
624 assert_eq!(args.file, "file.json");
625 assert!(args.cache.no_catalog);
626 Ok(())
627 }
628
629 #[test]
630 fn cli_parses_identify_all_options() -> anyhow::Result<()> {
631 let (_, args) = test_cli()
632 .run_inner(&[
633 "--explain",
634 "--no-catalog",
635 "--force-schema-fetch",
636 "--cache-dir",
637 "/tmp/cache",
638 "--schema-cache-ttl",
639 "30m",
640 "tsconfig.json",
641 ])
642 .map_err(|e| anyhow::anyhow!("{e:?}"))?;
643 assert_eq!(args.file, "tsconfig.json");
644 assert!(args.explain);
645 assert!(args.cache.no_catalog);
646 assert!(args.cache.force_schema_fetch);
647 assert_eq!(args.cache.cache_dir.as_deref(), Some("/tmp/cache"));
648 assert_eq!(
649 args.cache.schema_cache_ttl,
650 Some(core::time::Duration::from_secs(30 * 60))
651 );
652 Ok(())
653 }
654}