Skip to main content

lintel_identify/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::io::IsTerminal;
5use std::path::{Path, PathBuf};
6
7use anyhow::{Context, Result};
8use bpaf::Bpaf;
9use lintel_cli_common::{CLIGlobalOptions, CliCacheOptions};
10
11use lintel_check::config;
12use lintel_check::parsers;
13use lintel_check::retriever::SchemaCache;
14use lintel_check::validate;
15use schemastore::SchemaMatch;
16
17// ---------------------------------------------------------------------------
18// CLI args
19// ---------------------------------------------------------------------------
20
21#[derive(Debug, Clone, Bpaf)]
22#[bpaf(generate(identify_args_inner))]
23pub struct IdentifyArgs {
24    /// Show detailed schema documentation
25    #[bpaf(long("explain"), switch)]
26    pub explain: bool,
27
28    #[bpaf(external(lintel_cli_common::cli_cache_options))]
29    pub cache: CliCacheOptions,
30
31    /// Disable syntax highlighting in code blocks
32    #[bpaf(long("no-syntax-highlighting"), switch)]
33    pub no_syntax_highlighting: bool,
34
35    /// Print output directly instead of piping through a pager
36    #[bpaf(long("no-pager"), switch)]
37    pub no_pager: bool,
38
39    /// File to identify
40    #[bpaf(positional("FILE"))]
41    pub file: String,
42}
43
44/// Construct the bpaf parser for `IdentifyArgs`.
45pub fn identify_args() -> impl bpaf::Parser<IdentifyArgs> {
46    identify_args_inner()
47}
48
49// ---------------------------------------------------------------------------
50// Internal types
51// ---------------------------------------------------------------------------
52
53/// The source that resolved the schema URI for a file.
54#[derive(Debug)]
55enum SchemaSource {
56    Inline,
57    Config,
58    Catalog,
59}
60
61impl core::fmt::Display for SchemaSource {
62    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
63        match self {
64            SchemaSource::Inline => write!(f, "inline"),
65            SchemaSource::Config => write!(f, "config"),
66            SchemaSource::Catalog => write!(f, "catalog"),
67        }
68    }
69}
70
71/// Match details captured during schema resolution.
72struct ResolvedSchema<'a> {
73    uri: String,
74    source: SchemaSource,
75    /// Present only for catalog matches.
76    catalog_match: Option<CatalogMatchInfo<'a>>,
77    /// Present only for config matches.
78    config_pattern: Option<&'a str>,
79}
80
81/// Details from a catalog match, borrowed from the `CompiledCatalog`.
82struct CatalogMatchInfo<'a> {
83    matched_pattern: &'a str,
84    file_match: &'a [String],
85    name: &'a str,
86    description: Option<&'a str>,
87}
88
89impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
90    fn from(m: SchemaMatch<'a>) -> Self {
91        Self {
92            matched_pattern: m.matched_pattern,
93            file_match: m.file_match,
94            name: m.name,
95            description: m.description,
96        }
97    }
98}
99
100// ---------------------------------------------------------------------------
101// Resolved file schema — reusable by `lintel explain`
102// ---------------------------------------------------------------------------
103
104/// Result of resolving a schema for a given file path.
105pub struct ResolvedFileSchema {
106    /// The final schema URI (after rewrites and path resolution).
107    pub schema_uri: String,
108    /// A human-readable name (from catalog or URI).
109    pub display_name: String,
110    /// Whether the schema is a remote URL.
111    pub is_remote: bool,
112}
113
114/// Build a [`SchemaCache`] from [`CliCacheOptions`].
115pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
116    let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
117    if let Some(dir) = &cache.cache_dir {
118        builder = builder.cache_dir(PathBuf::from(dir));
119    }
120    if let Some(ttl) = cache.schema_cache_ttl {
121        builder = builder.ttl(ttl);
122    }
123    builder.build()
124}
125
126/// Resolve the schema URI for a file path using the same priority as validation:
127/// 1. Inline `$schema` / YAML modeline
128/// 2. Custom schema mappings from `lintel.toml [schemas]`
129/// 3. Catalog matching
130///
131/// # Errors
132///
133/// Returns an error if the file cannot be read.
134#[allow(clippy::missing_panics_doc)]
135pub async fn resolve_schema_for_file(
136    file_path: &Path,
137    cache: &CliCacheOptions,
138) -> Result<Option<ResolvedFileSchema>> {
139    let path_str = file_path.display().to_string();
140    let file_name = file_path
141        .file_name()
142        .and_then(|n| n.to_str())
143        .unwrap_or(&path_str);
144
145    let content =
146        std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
147
148    let retriever = build_retriever(cache);
149
150    let config_search_dir = file_path.parent().map(Path::to_path_buf);
151    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
152
153    let compiled_catalogs =
154        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
155
156    let detected_format = parsers::detect_format(file_path);
157    let (parser, instance) = parse_file(detected_format, &content, &path_str);
158
159    let Some(resolved) = resolve_schema(
160        parser.as_ref(),
161        &content,
162        &instance,
163        &path_str,
164        file_name,
165        &cfg,
166        &compiled_catalogs,
167    ) else {
168        return Ok(None);
169    };
170
171    let (schema_uri, is_remote) = finalize_uri(&resolved.uri, &cfg.rewrite, &config_dir, file_path);
172
173    let display_name = resolved
174        .catalog_match
175        .as_ref()
176        .map(|m| m.name.to_string())
177        .or_else(|| {
178            compiled_catalogs
179                .iter()
180                .find_map(|cat| cat.schema_name(&schema_uri))
181                .map(str::to_string)
182        })
183        .unwrap_or_else(|| schema_uri.clone());
184
185    Ok(Some(ResolvedFileSchema {
186        schema_uri,
187        display_name,
188        is_remote,
189    }))
190}
191
192// ---------------------------------------------------------------------------
193// Entry point
194// ---------------------------------------------------------------------------
195
196#[allow(clippy::missing_panics_doc, clippy::missing_errors_doc)]
197pub async fn run(args: IdentifyArgs, global: &CLIGlobalOptions) -> Result<bool> {
198    let file_path = Path::new(&args.file);
199    if !file_path.exists() {
200        anyhow::bail!("file not found: {}", args.file);
201    }
202
203    let content = std::fs::read_to_string(file_path)
204        .with_context(|| format!("failed to read {}", args.file))?;
205
206    let path_str = file_path.display().to_string();
207    let file_name = file_path
208        .file_name()
209        .and_then(|n| n.to_str())
210        .unwrap_or(&path_str);
211
212    let retriever = build_retriever(&args.cache);
213
214    let config_search_dir = file_path.parent().map(Path::to_path_buf);
215    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
216
217    let compiled_catalogs =
218        validate::fetch_compiled_catalogs(&retriever, &cfg, args.cache.no_catalog).await;
219
220    let detected_format = parsers::detect_format(file_path);
221    let (parser, instance) = parse_file(detected_format, &content, &path_str);
222
223    let Some(resolved) = resolve_schema(
224        parser.as_ref(),
225        &content,
226        &instance,
227        &path_str,
228        file_name,
229        &cfg,
230        &compiled_catalogs,
231    ) else {
232        eprintln!("{path_str}");
233        eprintln!("  no schema found");
234        return Ok(false);
235    };
236
237    let (schema_uri, is_remote) = finalize_uri(&resolved.uri, &cfg.rewrite, &config_dir, file_path);
238
239    let display_name = resolved
240        .catalog_match
241        .as_ref()
242        .map(|m| m.name)
243        .or_else(|| {
244            compiled_catalogs
245                .iter()
246                .find_map(|cat| cat.schema_name(&schema_uri))
247        })
248        .unwrap_or(&schema_uri);
249
250    print_identification(&path_str, &schema_uri, display_name, &resolved);
251
252    if args.explain {
253        run_explain(
254            &args,
255            global,
256            &schema_uri,
257            display_name,
258            is_remote,
259            &retriever,
260        )
261        .await?;
262    }
263
264    Ok(false)
265}
266
267/// Try each resolution source in priority order, returning `None` if no schema is found.
268#[allow(clippy::too_many_arguments)]
269fn resolve_schema<'a>(
270    parser: &dyn parsers::Parser,
271    content: &str,
272    instance: &serde_json::Value,
273    path_str: &str,
274    file_name: &'a str,
275    cfg: &'a config::Config,
276    catalogs: &'a [lintel_check::catalog::CompiledCatalog],
277) -> Option<ResolvedSchema<'a>> {
278    if let Some(uri) = parser.extract_schema_uri(content, instance) {
279        return Some(ResolvedSchema {
280            uri,
281            source: SchemaSource::Inline,
282            catalog_match: None,
283            config_pattern: None,
284        });
285    }
286
287    if let Some((pattern, url)) = cfg
288        .schemas
289        .iter()
290        .find(|(pattern, _)| {
291            let p = path_str.strip_prefix("./").unwrap_or(path_str);
292            glob_match::glob_match(pattern, p) || glob_match::glob_match(pattern, file_name)
293        })
294        .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
295    {
296        return Some(ResolvedSchema {
297            uri: url.to_string(),
298            source: SchemaSource::Config,
299            catalog_match: None,
300            config_pattern: Some(pattern),
301        });
302    }
303
304    catalogs
305        .iter()
306        .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
307        .map(|schema_match| ResolvedSchema {
308            uri: schema_match.url.to_string(),
309            source: SchemaSource::Catalog,
310            catalog_match: Some(schema_match.into()),
311            config_pattern: None,
312        })
313}
314
315/// Apply rewrites, resolve relative paths, and determine whether the URI is remote.
316fn finalize_uri(
317    raw_uri: &str,
318    rewrites: &HashMap<String, String>,
319    config_dir: &Path,
320    file_path: &Path,
321) -> (String, bool) {
322    let schema_uri = config::apply_rewrites(raw_uri, rewrites);
323    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
324
325    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
326    let schema_uri = if is_remote {
327        schema_uri
328    } else {
329        file_path
330            .parent()
331            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
332            .unwrap_or(schema_uri)
333    };
334
335    (schema_uri, is_remote)
336}
337
338/// Print the identification summary to stdout.
339fn print_identification(
340    path_str: &str,
341    schema_uri: &str,
342    display_name: &str,
343    resolved: &ResolvedSchema<'_>,
344) {
345    println!("{path_str}");
346    if display_name == schema_uri {
347        println!("  schema: {schema_uri}");
348    } else {
349        println!("  schema: {display_name} ({schema_uri})");
350    }
351    println!("  source: {}", resolved.source);
352
353    match &resolved.source {
354        SchemaSource::Inline => {}
355        SchemaSource::Config => {
356            if let Some(pattern) = resolved.config_pattern {
357                println!("  matched: {pattern}");
358            }
359        }
360        SchemaSource::Catalog => {
361            if let Some(ref m) = resolved.catalog_match {
362                println!("  matched: {}", m.matched_pattern);
363                if m.file_match.len() > 1 {
364                    let globs = m
365                        .file_match
366                        .iter()
367                        .map(String::as_str)
368                        .collect::<Vec<_>>()
369                        .join(", ");
370                    println!("  globs: {globs}");
371                }
372                if let Some(desc) = m.description {
373                    println!("  description: {desc}");
374                }
375            }
376        }
377    }
378}
379
380/// Fetch the schema and render its documentation.
381#[allow(clippy::too_many_arguments)]
382async fn run_explain(
383    args: &IdentifyArgs,
384    global: &CLIGlobalOptions,
385    schema_uri: &str,
386    display_name: &str,
387    is_remote: bool,
388    retriever: &SchemaCache,
389) -> Result<()> {
390    let schema_value = if is_remote {
391        match retriever.fetch(schema_uri).await {
392            Ok((val, _)) => val,
393            Err(e) => {
394                eprintln!("  error fetching schema: {e}");
395                return Ok(());
396            }
397        }
398    } else {
399        let schema_content = std::fs::read_to_string(schema_uri)
400            .with_context(|| format!("failed to read schema: {schema_uri}"))?;
401        serde_json::from_str(&schema_content)
402            .with_context(|| format!("failed to parse schema: {schema_uri}"))?
403    };
404
405    let is_tty = std::io::stdout().is_terminal();
406    let use_color = match global.colors {
407        Some(lintel_cli_common::ColorsArg::Force) => true,
408        Some(lintel_cli_common::ColorsArg::Off) => false,
409        None => is_tty,
410    };
411    let opts = jsonschema_explain::ExplainOptions {
412        color: use_color,
413        syntax_highlight: use_color && !args.no_syntax_highlighting,
414        width: terminal_size::terminal_size()
415            .map(|(w, _)| w.0 as usize)
416            .or_else(|| std::env::var("COLUMNS").ok()?.parse().ok())
417            .unwrap_or(80),
418        validation_errors: vec![],
419    };
420    let output = jsonschema_explain::explain(&schema_value, display_name, &opts);
421
422    if is_tty && !args.no_pager {
423        lintel_cli_common::pipe_to_pager(&format!("\n{output}"));
424    } else {
425        println!();
426        print!("{output}");
427    }
428    Ok(())
429}
430
431/// Parse the file content, trying the detected format first, then all parsers as fallback.
432///
433/// Exits the process when the file cannot be parsed.
434fn parse_file(
435    detected_format: Option<parsers::FileFormat>,
436    content: &str,
437    path_str: &str,
438) -> (Box<dyn parsers::Parser>, serde_json::Value) {
439    if let Some(fmt) = detected_format {
440        let parser = parsers::parser_for(fmt);
441        if let Ok(val) = parser.parse(content, path_str) {
442            return (parser, val);
443        }
444        // Try all parsers as fallback
445        if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
446            return (parsers::parser_for(fmt), val);
447        }
448        eprintln!("{path_str}");
449        eprintln!("  no schema found (file could not be parsed)");
450        std::process::exit(0);
451    }
452
453    if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
454        return (parsers::parser_for(fmt), val);
455    }
456
457    eprintln!("{path_str}");
458    eprintln!("  no schema found (unrecognized format)");
459    std::process::exit(0);
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    use bpaf::Parser;
467    use lintel_cli_common::cli_global_options;
468
469    // Helper to build the CLI parser matching the binary's structure.
470    fn test_cli() -> bpaf::OptionParser<(CLIGlobalOptions, IdentifyArgs)> {
471        bpaf::construct!(cli_global_options(), identify_args())
472            .to_options()
473            .descr("test identify args")
474    }
475
476    #[test]
477    fn cli_parses_identify_basic() -> anyhow::Result<()> {
478        let (_, args) = test_cli()
479            .run_inner(&["file.json"])
480            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
481        assert_eq!(args.file, "file.json");
482        assert!(!args.explain);
483        assert!(!args.cache.no_catalog);
484        assert!(!args.cache.force_schema_fetch);
485        assert!(args.cache.cache_dir.is_none());
486        assert!(args.cache.schema_cache_ttl.is_none());
487        Ok(())
488    }
489
490    #[test]
491    fn cli_parses_identify_explain() -> anyhow::Result<()> {
492        let (_, args) = test_cli()
493            .run_inner(&["file.json", "--explain"])
494            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
495        assert_eq!(args.file, "file.json");
496        assert!(args.explain);
497        Ok(())
498    }
499
500    #[test]
501    fn cli_parses_identify_no_catalog() -> anyhow::Result<()> {
502        let (_, args) = test_cli()
503            .run_inner(&["--no-catalog", "file.json"])
504            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
505        assert_eq!(args.file, "file.json");
506        assert!(args.cache.no_catalog);
507        Ok(())
508    }
509
510    #[test]
511    fn cli_parses_identify_all_options() -> anyhow::Result<()> {
512        let (_, args) = test_cli()
513            .run_inner(&[
514                "--explain",
515                "--no-catalog",
516                "--force-schema-fetch",
517                "--cache-dir",
518                "/tmp/cache",
519                "--schema-cache-ttl",
520                "30m",
521                "tsconfig.json",
522            ])
523            .map_err(|e| anyhow::anyhow!("{e:?}"))?;
524        assert_eq!(args.file, "tsconfig.json");
525        assert!(args.explain);
526        assert!(args.cache.no_catalog);
527        assert!(args.cache.force_schema_fetch);
528        assert_eq!(args.cache.cache_dir.as_deref(), Some("/tmp/cache"));
529        assert_eq!(
530            args.cache.schema_cache_ttl,
531            Some(core::time::Duration::from_secs(30 * 60))
532        );
533        Ok(())
534    }
535}