Skip to main content

lintel_identify/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::io::IsTerminal;
5use std::path::{Path, PathBuf};
6
7use anyhow::{Context, Result};
8use bpaf::{Bpaf, ShellComp};
9use lintel_cli_common::{CLIGlobalOptions, CliCacheOptions};
10
11use lintel_schema_cache::SchemaCache;
12use lintel_validate::parsers;
13use lintel_validate::validate;
14use schema_catalog::{FileFormat, SchemaMatch};
15
16// ---------------------------------------------------------------------------
17// CLI args
18// ---------------------------------------------------------------------------
19
20#[derive(Debug, Clone, Bpaf)]
21#[bpaf(generate(identify_args_inner))]
22pub struct IdentifyArgs {
23    /// Show detailed schema documentation
24    #[bpaf(long("explain"), switch)]
25    pub explain: bool,
26
27    #[bpaf(external(lintel_cli_common::cli_cache_options))]
28    pub cache: CliCacheOptions,
29
30    /// Disable syntax highlighting in code blocks
31    #[bpaf(long("no-syntax-highlighting"), switch)]
32    pub no_syntax_highlighting: bool,
33
34    /// Print output directly instead of piping through a pager
35    #[bpaf(long("no-pager"), switch)]
36    pub no_pager: bool,
37
38    /// File to identify
39    #[bpaf(positional("FILE"), complete_shell(ShellComp::File { mask: None }))]
40    pub file: String,
41}
42
43/// Construct the bpaf parser for `IdentifyArgs`.
44pub fn identify_args() -> impl bpaf::Parser<IdentifyArgs> {
45    identify_args_inner()
46}
47
48// ---------------------------------------------------------------------------
49// Internal types
50// ---------------------------------------------------------------------------
51
52/// The source that resolved the schema URI for a file.
53#[derive(Debug)]
54enum SchemaSource {
55    Inline,
56    Config,
57    Catalog,
58}
59
60impl core::fmt::Display for SchemaSource {
61    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62        match self {
63            SchemaSource::Inline => write!(f, "inline"),
64            SchemaSource::Config => write!(f, "config"),
65            SchemaSource::Catalog => write!(f, "catalog"),
66        }
67    }
68}
69
70/// Match details captured during schema resolution.
71struct ResolvedSchema<'a> {
72    uri: String,
73    source: SchemaSource,
74    /// Present only for catalog matches.
75    catalog_match: Option<CatalogMatchInfo<'a>>,
76    /// Present only for config matches.
77    config_pattern: Option<&'a str>,
78}
79
80/// Details from a catalog match, borrowed from the `CompiledCatalog`.
81struct CatalogMatchInfo<'a> {
82    matched_pattern: &'a str,
83    file_match: &'a [String],
84    name: &'a str,
85    description: Option<&'a str>,
86}
87
88impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
89    fn from(m: SchemaMatch<'a>) -> Self {
90        Self {
91            matched_pattern: m.matched_pattern,
92            file_match: m.file_match,
93            name: m.name,
94            description: m.description,
95        }
96    }
97}
98
99// ---------------------------------------------------------------------------
100// Resolved file schema — reusable by `lintel explain`
101// ---------------------------------------------------------------------------
102
103/// Result of resolving a schema for a given file path.
104pub struct ResolvedFileSchema {
105    /// The final schema URI (after rewrites and path resolution).
106    pub schema_uri: String,
107    /// A human-readable name (from catalog or URI).
108    pub display_name: String,
109    /// Whether the schema is a remote URL.
110    pub is_remote: bool,
111}
112
113/// Build a [`SchemaCache`] from [`CliCacheOptions`].
114pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
115    let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
116    if let Some(dir) = &cache.cache_dir {
117        builder = builder.cache_dir(PathBuf::from(dir));
118    }
119    if let Some(ttl) = cache.schema_cache_ttl {
120        builder = builder.ttl(ttl);
121    }
122    builder.build()
123}
124
125/// Resolve the schema URI for a file path using the same priority as validation:
126/// 1. Inline `$schema` / YAML modeline
127/// 2. Custom schema mappings from `lintel.toml [schemas]`
128/// 3. Catalog matching
129///
130/// # Errors
131///
132/// Returns an error if the file cannot be read.
133#[allow(clippy::missing_panics_doc)]
134pub async fn resolve_schema_for_file(
135    file_path: &Path,
136    cache: &CliCacheOptions,
137) -> Result<Option<ResolvedFileSchema>> {
138    let path_str = file_path.display().to_string();
139    let content =
140        std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
141
142    resolve_schema_for_content(&content, file_path, None, cache).await
143}
144
145/// Resolve a schema from in-memory content and a virtual file path.
146///
147/// Uses `file_path` for extension detection and catalog matching, and
148/// `config_search_dir` for locating `lintel.toml` (falls back to
149/// `file_path.parent()` when `None`).
150///
151/// Resolution order: inline `$schema` > config > catalogs.
152///
153/// # Errors
154///
155/// Returns an error if catalogs cannot be fetched.
156#[allow(clippy::missing_panics_doc)]
157pub async fn resolve_schema_for_content(
158    content: &str,
159    file_path: &Path,
160    config_search_dir: Option<&Path>,
161    cache: &CliCacheOptions,
162) -> Result<Option<ResolvedFileSchema>> {
163    let path_str = file_path.display().to_string();
164    let file_name = file_path
165        .file_name()
166        .and_then(|n| n.to_str())
167        .unwrap_or(&path_str);
168
169    let retriever = build_retriever(cache);
170
171    let search_dir = config_search_dir
172        .map(Path::to_path_buf)
173        .or_else(|| file_path.parent().map(Path::to_path_buf));
174    let (cfg, config_dir, _config_path) = validate::load_config(search_dir.as_deref());
175
176    let compiled_catalogs =
177        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
178
179    let detected_format = parsers::detect_format(file_path);
180    let (parser, instance) = parse_file(detected_format, content, &path_str);
181
182    let Some(resolved) = resolve_schema(
183        parser.as_ref(),
184        content,
185        &instance,
186        &path_str,
187        file_name,
188        &cfg,
189        &compiled_catalogs,
190    ) else {
191        return Ok(None);
192    };
193
194    let from_inline = matches!(resolved.source, SchemaSource::Inline);
195    let (schema_uri, is_remote) = finalize_uri(
196        &resolved.uri,
197        &cfg.rewrite,
198        &config_dir,
199        file_path,
200        from_inline,
201    );
202
203    let display_name = resolved
204        .catalog_match
205        .as_ref()
206        .map(|m| m.name.to_string())
207        .or_else(|| {
208            compiled_catalogs
209                .iter()
210                .find_map(|cat| cat.schema_name(&schema_uri))
211                .map(str::to_string)
212        })
213        .unwrap_or_else(|| schema_uri.clone());
214
215    Ok(Some(ResolvedFileSchema {
216        schema_uri,
217        display_name,
218        is_remote,
219    }))
220}
221
222/// Resolve the schema URI for a file path using only path-based matching:
223/// 1. Custom schema mappings from `lintel.toml [schemas]`
224/// 2. Catalog matching
225///
226/// Unlike [`resolve_schema_for_file`], this does NOT read the file or check
227/// for inline `$schema` directives. The file does not need to exist.
228///
229/// # Errors
230///
231/// Returns an error if the catalogs cannot be fetched.
232#[allow(clippy::missing_panics_doc)]
233pub async fn resolve_schema_for_path(
234    file_path: &Path,
235    cache: &CliCacheOptions,
236) -> Result<Option<ResolvedFileSchema>> {
237    let path_str = file_path.display().to_string();
238    let file_name = file_path
239        .file_name()
240        .and_then(|n| n.to_str())
241        .unwrap_or(&path_str);
242
243    let retriever = build_retriever(cache);
244
245    let config_search_dir = file_path.parent().map(Path::to_path_buf);
246    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
247
248    let compiled_catalogs =
249        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
250
251    let Some(resolved) = resolve_schema_path_only(&path_str, file_name, &cfg, &compiled_catalogs)
252    else {
253        return Ok(None);
254    };
255
256    let from_inline = matches!(resolved.source, SchemaSource::Inline);
257    let (schema_uri, is_remote) = finalize_uri(
258        &resolved.uri,
259        &cfg.rewrite,
260        &config_dir,
261        file_path,
262        from_inline,
263    );
264
265    let display_name = resolved
266        .catalog_match
267        .as_ref()
268        .map(|m| m.name.to_string())
269        .or_else(|| {
270            compiled_catalogs
271                .iter()
272                .find_map(|cat| cat.schema_name(&schema_uri))
273                .map(str::to_string)
274        })
275        .unwrap_or_else(|| schema_uri.clone());
276
277    Ok(Some(ResolvedFileSchema {
278        schema_uri,
279        display_name,
280        is_remote,
281    }))
282}
283
284// ---------------------------------------------------------------------------
285// Entry point
286// ---------------------------------------------------------------------------
287
288#[allow(clippy::missing_panics_doc, clippy::missing_errors_doc)]
289pub async fn run(args: IdentifyArgs, global: &CLIGlobalOptions) -> Result<bool> {
290    let file_path = Path::new(&args.file);
291    if !file_path.exists() {
292        anyhow::bail!("file not found: {}", args.file);
293    }
294
295    let content = std::fs::read_to_string(file_path)
296        .with_context(|| format!("failed to read {}", args.file))?;
297
298    let path_str = file_path.display().to_string();
299    let file_name = file_path
300        .file_name()
301        .and_then(|n| n.to_str())
302        .unwrap_or(&path_str);
303
304    let retriever = build_retriever(&args.cache);
305
306    let config_search_dir = file_path.parent().map(Path::to_path_buf);
307    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
308
309    let compiled_catalogs =
310        validate::fetch_compiled_catalogs(&retriever, &cfg, args.cache.no_catalog).await;
311
312    let detected_format = parsers::detect_format(file_path);
313    let (parser, instance) = parse_file(detected_format, &content, &path_str);
314
315    let Some(resolved) = resolve_schema(
316        parser.as_ref(),
317        &content,
318        &instance,
319        &path_str,
320        file_name,
321        &cfg,
322        &compiled_catalogs,
323    ) else {
324        eprintln!("{path_str}");
325        eprintln!("  no schema found");
326        return Ok(false);
327    };
328
329    let from_inline = matches!(resolved.source, SchemaSource::Inline);
330    let (schema_uri, is_remote) = finalize_uri(
331        &resolved.uri,
332        &cfg.rewrite,
333        &config_dir,
334        file_path,
335        from_inline,
336    );
337
338    let display_name = resolved
339        .catalog_match
340        .as_ref()
341        .map(|m| m.name)
342        .or_else(|| {
343            compiled_catalogs
344                .iter()
345                .find_map(|cat| cat.schema_name(&schema_uri))
346        })
347        .unwrap_or(&schema_uri);
348
349    print_identification(&path_str, &schema_uri, display_name, &resolved);
350
351    if args.explain {
352        run_explain(
353            &args,
354            global,
355            &schema_uri,
356            display_name,
357            is_remote,
358            &retriever,
359        )
360        .await?;
361    }
362
363    Ok(false)
364}
365
366/// Try each resolution source in priority order, returning `None` if no schema is found.
367#[allow(clippy::too_many_arguments)]
368fn resolve_schema<'a>(
369    parser: &dyn parsers::Parser,
370    content: &str,
371    instance: &serde_json::Value,
372    path_str: &str,
373    file_name: &'a str,
374    cfg: &'a lintel_config::Config,
375    catalogs: &'a [schema_catalog::CompiledCatalog],
376) -> Option<ResolvedSchema<'a>> {
377    if let Some(uri) = parser.extract_schema_uri(content, instance) {
378        return Some(ResolvedSchema {
379            uri,
380            source: SchemaSource::Inline,
381            catalog_match: None,
382            config_pattern: None,
383        });
384    }
385
386    resolve_schema_path_only(path_str, file_name, cfg, catalogs)
387}
388
389/// Try config mappings and catalog matching only (no inline `$schema`).
390fn resolve_schema_path_only<'a>(
391    path_str: &str,
392    file_name: &'a str,
393    cfg: &'a lintel_config::Config,
394    catalogs: &'a [schema_catalog::CompiledCatalog],
395) -> Option<ResolvedSchema<'a>> {
396    if let Some((pattern, url)) = cfg
397        .schemas
398        .iter()
399        .find(|(pattern, _)| {
400            let p = path_str.strip_prefix("./").unwrap_or(path_str);
401            glob_match::glob_match(pattern, p) || glob_match::glob_match(pattern, file_name)
402        })
403        .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
404    {
405        return Some(ResolvedSchema {
406            uri: url.to_string(),
407            source: SchemaSource::Config,
408            catalog_match: None,
409            config_pattern: Some(pattern),
410        });
411    }
412
413    catalogs
414        .iter()
415        .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
416        .map(|schema_match| ResolvedSchema {
417            uri: schema_match.url.to_string(),
418            source: SchemaSource::Catalog,
419            catalog_match: Some(schema_match.into()),
420            config_pattern: None,
421        })
422}
423
424/// Apply rewrites, resolve relative paths, and determine whether the URI is remote.
425///
426/// When `from_inline` is true, relative paths resolve against the file's parent
427/// directory (inline `$schema`). Otherwise they resolve against the config
428/// directory where `lintel.toml` lives.
429#[allow(clippy::too_many_arguments)]
430fn finalize_uri(
431    raw_uri: &str,
432    rewrites: &HashMap<String, String>,
433    config_dir: &Path,
434    file_path: &Path,
435    from_inline: bool,
436) -> (String, bool) {
437    let schema_uri = lintel_config::apply_rewrites(raw_uri, rewrites);
438    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
439
440    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
441    let schema_uri = if is_remote {
442        schema_uri
443    } else {
444        let base_dir = if from_inline {
445            file_path.parent()
446        } else {
447            Some(config_dir)
448        };
449        base_dir
450            .map(|dir| dir.join(&schema_uri).to_string_lossy().to_string())
451            .unwrap_or(schema_uri)
452    };
453
454    (schema_uri, is_remote)
455}
456
457/// Print the identification summary to stdout.
458fn print_identification(
459    path_str: &str,
460    schema_uri: &str,
461    display_name: &str,
462    resolved: &ResolvedSchema<'_>,
463) {
464    println!("{path_str}");
465    if display_name == schema_uri {
466        println!("  schema: {schema_uri}");
467    } else {
468        println!("  schema: {display_name} ({schema_uri})");
469    }
470    println!("  source: {}", resolved.source);
471
472    match &resolved.source {
473        SchemaSource::Inline => {}
474        SchemaSource::Config => {
475            if let Some(pattern) = resolved.config_pattern {
476                println!("  matched: {pattern}");
477            }
478        }
479        SchemaSource::Catalog => {
480            if let Some(ref m) = resolved.catalog_match {
481                println!("  matched: {}", m.matched_pattern);
482                if m.file_match.len() > 1 {
483                    let globs = m
484                        .file_match
485                        .iter()
486                        .map(String::as_str)
487                        .collect::<Vec<_>>()
488                        .join(", ");
489                    println!("  globs: {globs}");
490                }
491                if let Some(desc) = m.description {
492                    println!("  description: {desc}");
493                }
494            }
495        }
496    }
497}
498
499/// Fetch the schema and render its documentation.
500#[allow(clippy::too_many_arguments)]
501async fn run_explain(
502    args: &IdentifyArgs,
503    global: &CLIGlobalOptions,
504    schema_uri: &str,
505    display_name: &str,
506    is_remote: bool,
507    retriever: &SchemaCache,
508) -> Result<()> {
509    let schema_value = if is_remote {
510        match retriever.fetch(schema_uri).await {
511            Ok((val, _)) => val,
512            Err(e) => {
513                eprintln!("  error fetching schema: {e}");
514                return Ok(());
515            }
516        }
517    } else {
518        let schema_content = std::fs::read_to_string(schema_uri)
519            .with_context(|| format!("failed to read schema: {schema_uri}"))?;
520        serde_json::from_str(&schema_content)
521            .with_context(|| format!("failed to parse schema: {schema_uri}"))?
522    };
523
524    let is_tty = std::io::stdout().is_terminal();
525    let use_color = match global.colors {
526        Some(lintel_cli_common::ColorsArg::Force) => true,
527        Some(lintel_cli_common::ColorsArg::Off) => false,
528        None => is_tty,
529    };
530    let opts = jsonschema_explain::ExplainOptions {
531        color: use_color,
532        syntax_highlight: use_color && !args.no_syntax_highlighting,
533        width: terminal_size::terminal_size()
534            .map(|(w, _)| w.0 as usize)
535            .or_else(|| std::env::var("COLUMNS").ok()?.parse().ok())
536            .unwrap_or(80),
537        validation_errors: vec![],
538    };
539    let output = jsonschema_explain::explain(&schema_value, display_name, &opts);
540
541    if is_tty && !args.no_pager {
542        lintel_cli_common::pipe_to_pager(&format!("\n{output}"));
543    } else {
544        println!();
545        print!("{output}");
546    }
547    Ok(())
548}
549
550/// Parse the file content, trying the detected format first, then all parsers as fallback.
551///
552/// Exits the process when the file cannot be parsed.
553fn parse_file(
554    detected_format: Option<FileFormat>,
555    content: &str,
556    path_str: &str,
557) -> (Box<dyn parsers::Parser>, serde_json::Value) {
558    if let Some(fmt) = detected_format {
559        let parser = parsers::parser_for(fmt);
560        if let Ok(val) = parser.parse(content, path_str) {
561            return (parser, val);
562        }
563        // Try all parsers as fallback
564        if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
565            return (parsers::parser_for(fmt), val);
566        }
567        eprintln!("{path_str}");
568        eprintln!("  no schema found (file could not be parsed)");
569        std::process::exit(0);
570    }
571
572    if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
573        return (parsers::parser_for(fmt), val);
574    }
575
576    eprintln!("{path_str}");
577    eprintln!("  no schema found (unrecognized format)");
578    std::process::exit(0);
579}
580
581#[cfg(test)]
582mod tests {
583    use super::*;
584
585    use bpaf::Parser;
586    use lintel_cli_common::cli_global_options;
587
588    // Helper to build the CLI parser matching the binary's structure.
589    fn test_cli() -> bpaf::OptionParser<(CLIGlobalOptions, IdentifyArgs)> {
590        bpaf::construct!(cli_global_options(), identify_args())
591            .to_options()
592            .descr("test identify args")
593    }
594
595    #[test]
596    fn cli_parses_identify_basic() -> anyhow::Result<()> {
597        let (_, args) = test_cli()
598            .run_inner(&["file.json"])
599            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
600        assert_eq!(args.file, "file.json");
601        assert!(!args.explain);
602        assert!(!args.cache.no_catalog);
603        assert!(!args.cache.force_schema_fetch);
604        assert!(args.cache.cache_dir.is_none());
605        assert!(args.cache.schema_cache_ttl.is_none());
606        Ok(())
607    }
608
609    #[test]
610    fn cli_parses_identify_explain() -> anyhow::Result<()> {
611        let (_, args) = test_cli()
612            .run_inner(&["file.json", "--explain"])
613            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
614        assert_eq!(args.file, "file.json");
615        assert!(args.explain);
616        Ok(())
617    }
618
619    #[test]
620    fn cli_parses_identify_no_catalog() -> anyhow::Result<()> {
621        let (_, args) = test_cli()
622            .run_inner(&["--no-catalog", "file.json"])
623            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
624        assert_eq!(args.file, "file.json");
625        assert!(args.cache.no_catalog);
626        Ok(())
627    }
628
629    #[test]
630    fn cli_parses_identify_all_options() -> anyhow::Result<()> {
631        let (_, args) = test_cli()
632            .run_inner(&[
633                "--explain",
634                "--no-catalog",
635                "--force-schema-fetch",
636                "--cache-dir",
637                "/tmp/cache",
638                "--schema-cache-ttl",
639                "30m",
640                "tsconfig.json",
641            ])
642            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
643        assert_eq!(args.file, "tsconfig.json");
644        assert!(args.explain);
645        assert!(args.cache.no_catalog);
646        assert!(args.cache.force_schema_fetch);
647        assert_eq!(args.cache.cache_dir.as_deref(), Some("/tmp/cache"));
648        assert_eq!(
649            args.cache.schema_cache_ttl,
650            Some(core::time::Duration::from_secs(30 * 60))
651        );
652        Ok(())
653    }
654}