Skip to main content

lintel_identify/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::io::IsTerminal;
5use std::path::{Path, PathBuf};
6
7use anyhow::{Context, Result};
8use bpaf::Bpaf;
9use lintel_cli_common::{CLIGlobalOptions, CliCacheOptions};
10
11use lintel_schema_cache::SchemaCache;
12use lintel_validate::parsers;
13use lintel_validate::validate;
14use schemastore::SchemaMatch;
15
16// ---------------------------------------------------------------------------
17// CLI args
18// ---------------------------------------------------------------------------
19
20#[derive(Debug, Clone, Bpaf)]
21#[bpaf(generate(identify_args_inner))]
22pub struct IdentifyArgs {
23    /// Show detailed schema documentation
24    #[bpaf(long("explain"), switch)]
25    pub explain: bool,
26
27    #[bpaf(external(lintel_cli_common::cli_cache_options))]
28    pub cache: CliCacheOptions,
29
30    /// Disable syntax highlighting in code blocks
31    #[bpaf(long("no-syntax-highlighting"), switch)]
32    pub no_syntax_highlighting: bool,
33
34    /// Print output directly instead of piping through a pager
35    #[bpaf(long("no-pager"), switch)]
36    pub no_pager: bool,
37
38    /// File to identify
39    #[bpaf(positional("FILE"))]
40    pub file: String,
41}
42
43/// Construct the bpaf parser for `IdentifyArgs`.
44pub fn identify_args() -> impl bpaf::Parser<IdentifyArgs> {
45    identify_args_inner()
46}
47
48// ---------------------------------------------------------------------------
49// Internal types
50// ---------------------------------------------------------------------------
51
52/// The source that resolved the schema URI for a file.
53#[derive(Debug)]
54enum SchemaSource {
55    Inline,
56    Config,
57    Catalog,
58}
59
60impl core::fmt::Display for SchemaSource {
61    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62        match self {
63            SchemaSource::Inline => write!(f, "inline"),
64            SchemaSource::Config => write!(f, "config"),
65            SchemaSource::Catalog => write!(f, "catalog"),
66        }
67    }
68}
69
70/// Match details captured during schema resolution.
71struct ResolvedSchema<'a> {
72    uri: String,
73    source: SchemaSource,
74    /// Present only for catalog matches.
75    catalog_match: Option<CatalogMatchInfo<'a>>,
76    /// Present only for config matches.
77    config_pattern: Option<&'a str>,
78}
79
80/// Details from a catalog match, borrowed from the `CompiledCatalog`.
81struct CatalogMatchInfo<'a> {
82    matched_pattern: &'a str,
83    file_match: &'a [String],
84    name: &'a str,
85    description: Option<&'a str>,
86}
87
88impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
89    fn from(m: SchemaMatch<'a>) -> Self {
90        Self {
91            matched_pattern: m.matched_pattern,
92            file_match: m.file_match,
93            name: m.name,
94            description: m.description,
95        }
96    }
97}
98
99// ---------------------------------------------------------------------------
100// Resolved file schema — reusable by `lintel explain`
101// ---------------------------------------------------------------------------
102
103/// Result of resolving a schema for a given file path.
104pub struct ResolvedFileSchema {
105    /// The final schema URI (after rewrites and path resolution).
106    pub schema_uri: String,
107    /// A human-readable name (from catalog or URI).
108    pub display_name: String,
109    /// Whether the schema is a remote URL.
110    pub is_remote: bool,
111}
112
113/// Build a [`SchemaCache`] from [`CliCacheOptions`].
114pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
115    let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
116    if let Some(dir) = &cache.cache_dir {
117        builder = builder.cache_dir(PathBuf::from(dir));
118    }
119    if let Some(ttl) = cache.schema_cache_ttl {
120        builder = builder.ttl(ttl);
121    }
122    builder.build()
123}
124
125/// Resolve the schema URI for a file path using the same priority as validation:
126/// 1. Inline `$schema` / YAML modeline
127/// 2. Custom schema mappings from `lintel.toml [schemas]`
128/// 3. Catalog matching
129///
130/// # Errors
131///
132/// Returns an error if the file cannot be read.
133#[allow(clippy::missing_panics_doc)]
134pub async fn resolve_schema_for_file(
135    file_path: &Path,
136    cache: &CliCacheOptions,
137) -> Result<Option<ResolvedFileSchema>> {
138    let path_str = file_path.display().to_string();
139    let content =
140        std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
141
142    resolve_schema_for_content(&content, file_path, None, cache).await
143}
144
145/// Resolve a schema from in-memory content and a virtual file path.
146///
147/// Uses `file_path` for extension detection and catalog matching, and
148/// `config_search_dir` for locating `lintel.toml` (falls back to
149/// `file_path.parent()` when `None`).
150///
151/// Resolution order: inline `$schema` > config > catalogs.
152///
153/// # Errors
154///
155/// Returns an error if catalogs cannot be fetched.
156#[allow(clippy::missing_panics_doc)]
157pub async fn resolve_schema_for_content(
158    content: &str,
159    file_path: &Path,
160    config_search_dir: Option<&Path>,
161    cache: &CliCacheOptions,
162) -> Result<Option<ResolvedFileSchema>> {
163    let path_str = file_path.display().to_string();
164    let file_name = file_path
165        .file_name()
166        .and_then(|n| n.to_str())
167        .unwrap_or(&path_str);
168
169    let retriever = build_retriever(cache);
170
171    let search_dir = config_search_dir
172        .map(Path::to_path_buf)
173        .or_else(|| file_path.parent().map(Path::to_path_buf));
174    let (cfg, config_dir, _config_path) = validate::load_config(search_dir.as_deref());
175
176    let compiled_catalogs =
177        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
178
179    let detected_format = parsers::detect_format(file_path);
180    let (parser, instance) = parse_file(detected_format, content, &path_str);
181
182    let Some(resolved) = resolve_schema(
183        parser.as_ref(),
184        content,
185        &instance,
186        &path_str,
187        file_name,
188        &cfg,
189        &compiled_catalogs,
190    ) else {
191        return Ok(None);
192    };
193
194    let (schema_uri, is_remote) = finalize_uri(&resolved.uri, &cfg.rewrite, &config_dir, file_path);
195
196    let display_name = resolved
197        .catalog_match
198        .as_ref()
199        .map(|m| m.name.to_string())
200        .or_else(|| {
201            compiled_catalogs
202                .iter()
203                .find_map(|cat| cat.schema_name(&schema_uri))
204                .map(str::to_string)
205        })
206        .unwrap_or_else(|| schema_uri.clone());
207
208    Ok(Some(ResolvedFileSchema {
209        schema_uri,
210        display_name,
211        is_remote,
212    }))
213}
214
215/// Resolve the schema URI for a file path using only path-based matching:
216/// 1. Custom schema mappings from `lintel.toml [schemas]`
217/// 2. Catalog matching
218///
219/// Unlike [`resolve_schema_for_file`], this does NOT read the file or check
220/// for inline `$schema` directives. The file does not need to exist.
221///
222/// # Errors
223///
224/// Returns an error if the catalogs cannot be fetched.
225#[allow(clippy::missing_panics_doc)]
226pub async fn resolve_schema_for_path(
227    file_path: &Path,
228    cache: &CliCacheOptions,
229) -> Result<Option<ResolvedFileSchema>> {
230    let path_str = file_path.display().to_string();
231    let file_name = file_path
232        .file_name()
233        .and_then(|n| n.to_str())
234        .unwrap_or(&path_str);
235
236    let retriever = build_retriever(cache);
237
238    let config_search_dir = file_path.parent().map(Path::to_path_buf);
239    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
240
241    let compiled_catalogs =
242        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
243
244    let Some(resolved) = resolve_schema_path_only(&path_str, file_name, &cfg, &compiled_catalogs)
245    else {
246        return Ok(None);
247    };
248
249    let (schema_uri, is_remote) = finalize_uri(&resolved.uri, &cfg.rewrite, &config_dir, file_path);
250
251    let display_name = resolved
252        .catalog_match
253        .as_ref()
254        .map(|m| m.name.to_string())
255        .or_else(|| {
256            compiled_catalogs
257                .iter()
258                .find_map(|cat| cat.schema_name(&schema_uri))
259                .map(str::to_string)
260        })
261        .unwrap_or_else(|| schema_uri.clone());
262
263    Ok(Some(ResolvedFileSchema {
264        schema_uri,
265        display_name,
266        is_remote,
267    }))
268}
269
270// ---------------------------------------------------------------------------
271// Entry point
272// ---------------------------------------------------------------------------
273
274#[allow(clippy::missing_panics_doc, clippy::missing_errors_doc)]
275pub async fn run(args: IdentifyArgs, global: &CLIGlobalOptions) -> Result<bool> {
276    let file_path = Path::new(&args.file);
277    if !file_path.exists() {
278        anyhow::bail!("file not found: {}", args.file);
279    }
280
281    let content = std::fs::read_to_string(file_path)
282        .with_context(|| format!("failed to read {}", args.file))?;
283
284    let path_str = file_path.display().to_string();
285    let file_name = file_path
286        .file_name()
287        .and_then(|n| n.to_str())
288        .unwrap_or(&path_str);
289
290    let retriever = build_retriever(&args.cache);
291
292    let config_search_dir = file_path.parent().map(Path::to_path_buf);
293    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
294
295    let compiled_catalogs =
296        validate::fetch_compiled_catalogs(&retriever, &cfg, args.cache.no_catalog).await;
297
298    let detected_format = parsers::detect_format(file_path);
299    let (parser, instance) = parse_file(detected_format, &content, &path_str);
300
301    let Some(resolved) = resolve_schema(
302        parser.as_ref(),
303        &content,
304        &instance,
305        &path_str,
306        file_name,
307        &cfg,
308        &compiled_catalogs,
309    ) else {
310        eprintln!("{path_str}");
311        eprintln!("  no schema found");
312        return Ok(false);
313    };
314
315    let (schema_uri, is_remote) = finalize_uri(&resolved.uri, &cfg.rewrite, &config_dir, file_path);
316
317    let display_name = resolved
318        .catalog_match
319        .as_ref()
320        .map(|m| m.name)
321        .or_else(|| {
322            compiled_catalogs
323                .iter()
324                .find_map(|cat| cat.schema_name(&schema_uri))
325        })
326        .unwrap_or(&schema_uri);
327
328    print_identification(&path_str, &schema_uri, display_name, &resolved);
329
330    if args.explain {
331        run_explain(
332            &args,
333            global,
334            &schema_uri,
335            display_name,
336            is_remote,
337            &retriever,
338        )
339        .await?;
340    }
341
342    Ok(false)
343}
344
345/// Try each resolution source in priority order, returning `None` if no schema is found.
346#[allow(clippy::too_many_arguments)]
347fn resolve_schema<'a>(
348    parser: &dyn parsers::Parser,
349    content: &str,
350    instance: &serde_json::Value,
351    path_str: &str,
352    file_name: &'a str,
353    cfg: &'a lintel_config::Config,
354    catalogs: &'a [schemastore::CompiledCatalog],
355) -> Option<ResolvedSchema<'a>> {
356    if let Some(uri) = parser.extract_schema_uri(content, instance) {
357        return Some(ResolvedSchema {
358            uri,
359            source: SchemaSource::Inline,
360            catalog_match: None,
361            config_pattern: None,
362        });
363    }
364
365    resolve_schema_path_only(path_str, file_name, cfg, catalogs)
366}
367
368/// Try config mappings and catalog matching only (no inline `$schema`).
369fn resolve_schema_path_only<'a>(
370    path_str: &str,
371    file_name: &'a str,
372    cfg: &'a lintel_config::Config,
373    catalogs: &'a [schemastore::CompiledCatalog],
374) -> Option<ResolvedSchema<'a>> {
375    if let Some((pattern, url)) = cfg
376        .schemas
377        .iter()
378        .find(|(pattern, _)| {
379            let p = path_str.strip_prefix("./").unwrap_or(path_str);
380            glob_match::glob_match(pattern, p) || glob_match::glob_match(pattern, file_name)
381        })
382        .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
383    {
384        return Some(ResolvedSchema {
385            uri: url.to_string(),
386            source: SchemaSource::Config,
387            catalog_match: None,
388            config_pattern: Some(pattern),
389        });
390    }
391
392    catalogs
393        .iter()
394        .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
395        .map(|schema_match| ResolvedSchema {
396            uri: schema_match.url.to_string(),
397            source: SchemaSource::Catalog,
398            catalog_match: Some(schema_match.into()),
399            config_pattern: None,
400        })
401}
402
403/// Apply rewrites, resolve relative paths, and determine whether the URI is remote.
404fn finalize_uri(
405    raw_uri: &str,
406    rewrites: &HashMap<String, String>,
407    config_dir: &Path,
408    file_path: &Path,
409) -> (String, bool) {
410    let schema_uri = lintel_config::apply_rewrites(raw_uri, rewrites);
411    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
412
413    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
414    let schema_uri = if is_remote {
415        schema_uri
416    } else {
417        file_path
418            .parent()
419            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
420            .unwrap_or(schema_uri)
421    };
422
423    (schema_uri, is_remote)
424}
425
426/// Print the identification summary to stdout.
427fn print_identification(
428    path_str: &str,
429    schema_uri: &str,
430    display_name: &str,
431    resolved: &ResolvedSchema<'_>,
432) {
433    println!("{path_str}");
434    if display_name == schema_uri {
435        println!("  schema: {schema_uri}");
436    } else {
437        println!("  schema: {display_name} ({schema_uri})");
438    }
439    println!("  source: {}", resolved.source);
440
441    match &resolved.source {
442        SchemaSource::Inline => {}
443        SchemaSource::Config => {
444            if let Some(pattern) = resolved.config_pattern {
445                println!("  matched: {pattern}");
446            }
447        }
448        SchemaSource::Catalog => {
449            if let Some(ref m) = resolved.catalog_match {
450                println!("  matched: {}", m.matched_pattern);
451                if m.file_match.len() > 1 {
452                    let globs = m
453                        .file_match
454                        .iter()
455                        .map(String::as_str)
456                        .collect::<Vec<_>>()
457                        .join(", ");
458                    println!("  globs: {globs}");
459                }
460                if let Some(desc) = m.description {
461                    println!("  description: {desc}");
462                }
463            }
464        }
465    }
466}
467
468/// Fetch the schema and render its documentation.
469#[allow(clippy::too_many_arguments)]
470async fn run_explain(
471    args: &IdentifyArgs,
472    global: &CLIGlobalOptions,
473    schema_uri: &str,
474    display_name: &str,
475    is_remote: bool,
476    retriever: &SchemaCache,
477) -> Result<()> {
478    let schema_value = if is_remote {
479        match retriever.fetch(schema_uri).await {
480            Ok((val, _)) => val,
481            Err(e) => {
482                eprintln!("  error fetching schema: {e}");
483                return Ok(());
484            }
485        }
486    } else {
487        let schema_content = std::fs::read_to_string(schema_uri)
488            .with_context(|| format!("failed to read schema: {schema_uri}"))?;
489        serde_json::from_str(&schema_content)
490            .with_context(|| format!("failed to parse schema: {schema_uri}"))?
491    };
492
493    let is_tty = std::io::stdout().is_terminal();
494    let use_color = match global.colors {
495        Some(lintel_cli_common::ColorsArg::Force) => true,
496        Some(lintel_cli_common::ColorsArg::Off) => false,
497        None => is_tty,
498    };
499    let opts = jsonschema_explain::ExplainOptions {
500        color: use_color,
501        syntax_highlight: use_color && !args.no_syntax_highlighting,
502        width: terminal_size::terminal_size()
503            .map(|(w, _)| w.0 as usize)
504            .or_else(|| std::env::var("COLUMNS").ok()?.parse().ok())
505            .unwrap_or(80),
506        validation_errors: vec![],
507    };
508    let output = jsonschema_explain::explain(&schema_value, display_name, &opts);
509
510    if is_tty && !args.no_pager {
511        lintel_cli_common::pipe_to_pager(&format!("\n{output}"));
512    } else {
513        println!();
514        print!("{output}");
515    }
516    Ok(())
517}
518
519/// Parse the file content, trying the detected format first, then all parsers as fallback.
520///
521/// Exits the process when the file cannot be parsed.
522fn parse_file(
523    detected_format: Option<parsers::FileFormat>,
524    content: &str,
525    path_str: &str,
526) -> (Box<dyn parsers::Parser>, serde_json::Value) {
527    if let Some(fmt) = detected_format {
528        let parser = parsers::parser_for(fmt);
529        if let Ok(val) = parser.parse(content, path_str) {
530            return (parser, val);
531        }
532        // Try all parsers as fallback
533        if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
534            return (parsers::parser_for(fmt), val);
535        }
536        eprintln!("{path_str}");
537        eprintln!("  no schema found (file could not be parsed)");
538        std::process::exit(0);
539    }
540
541    if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
542        return (parsers::parser_for(fmt), val);
543    }
544
545    eprintln!("{path_str}");
546    eprintln!("  no schema found (unrecognized format)");
547    std::process::exit(0);
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    use bpaf::Parser;
555    use lintel_cli_common::cli_global_options;
556
557    // Helper to build the CLI parser matching the binary's structure.
558    fn test_cli() -> bpaf::OptionParser<(CLIGlobalOptions, IdentifyArgs)> {
559        bpaf::construct!(cli_global_options(), identify_args())
560            .to_options()
561            .descr("test identify args")
562    }
563
564    #[test]
565    fn cli_parses_identify_basic() -> anyhow::Result<()> {
566        let (_, args) = test_cli()
567            .run_inner(&["file.json"])
568            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
569        assert_eq!(args.file, "file.json");
570        assert!(!args.explain);
571        assert!(!args.cache.no_catalog);
572        assert!(!args.cache.force_schema_fetch);
573        assert!(args.cache.cache_dir.is_none());
574        assert!(args.cache.schema_cache_ttl.is_none());
575        Ok(())
576    }
577
578    #[test]
579    fn cli_parses_identify_explain() -> anyhow::Result<()> {
580        let (_, args) = test_cli()
581            .run_inner(&["file.json", "--explain"])
582            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
583        assert_eq!(args.file, "file.json");
584        assert!(args.explain);
585        Ok(())
586    }
587
588    #[test]
589    fn cli_parses_identify_no_catalog() -> anyhow::Result<()> {
590        let (_, args) = test_cli()
591            .run_inner(&["--no-catalog", "file.json"])
592            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
593        assert_eq!(args.file, "file.json");
594        assert!(args.cache.no_catalog);
595        Ok(())
596    }
597
598    #[test]
599    fn cli_parses_identify_all_options() -> anyhow::Result<()> {
600        let (_, args) = test_cli()
601            .run_inner(&[
602                "--explain",
603                "--no-catalog",
604                "--force-schema-fetch",
605                "--cache-dir",
606                "/tmp/cache",
607                "--schema-cache-ttl",
608                "30m",
609                "tsconfig.json",
610            ])
611            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
612        assert_eq!(args.file, "tsconfig.json");
613        assert!(args.explain);
614        assert!(args.cache.no_catalog);
615        assert!(args.cache.force_schema_fetch);
616        assert_eq!(args.cache.cache_dir.as_deref(), Some("/tmp/cache"));
617        assert_eq!(
618            args.cache.schema_cache_ttl,
619            Some(core::time::Duration::from_secs(30 * 60))
620        );
621        Ok(())
622    }
623}