Skip to main content

lintel_explain/
resolve.rs

1//! Schema resolution for files.
2//!
3//! Resolves a schema URI for a given file path using priority order:
4//! 1. Inline `$schema` / YAML modeline
5//! 2. Custom schema mappings from `lintel.toml [schemas]`
6//! 3. Catalog matching
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11use anyhow::{Context, Result};
12
13use lintel_cli_common::CliCacheOptions;
14use lintel_schema_cache::SchemaCache;
15use lintel_validate::parsers;
16use lintel_validate::validate;
17use schema_catalog::{FileFormat, SchemaMatch};
18
19// ---------------------------------------------------------------------------
20// Public types
21// ---------------------------------------------------------------------------
22
23/// The source that resolved the schema URI for a file.
24#[derive(Debug)]
25pub enum SchemaSource {
26    Inline,
27    Config,
28    Catalog,
29}
30
31impl core::fmt::Display for SchemaSource {
32    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
33        match self {
34            SchemaSource::Inline => write!(f, "inline"),
35            SchemaSource::Config => write!(f, "config"),
36            SchemaSource::Catalog => write!(f, "catalog"),
37        }
38    }
39}
40
41/// Result of resolving a schema for a given file path.
42pub struct ResolvedFileSchema {
43    /// The final schema URI (after rewrites and path resolution).
44    pub schema_uri: String,
45    /// A human-readable name (from catalog or URI).
46    pub display_name: String,
47    /// Whether the schema is a remote URL.
48    pub is_remote: bool,
49    /// How the schema was resolved.
50    pub source: SchemaSource,
51    /// The glob pattern that matched (config or catalog).
52    pub matched_pattern: Option<String>,
53    /// All file-match globs from the catalog entry.
54    pub file_match: Vec<String>,
55    /// Schema description from the catalog.
56    pub description: Option<String>,
57}
58
59// ---------------------------------------------------------------------------
60// Internal types
61// ---------------------------------------------------------------------------
62
63/// Match details captured during schema resolution.
64struct ResolvedSchema<'a> {
65    uri: String,
66    source: SchemaSource,
67    /// Present only for catalog matches.
68    catalog_match: Option<CatalogMatchInfo<'a>>,
69    /// Present only for config matches.
70    config_pattern: Option<&'a str>,
71}
72
73/// Details from a catalog match, borrowed from the `CompiledCatalog`.
74struct CatalogMatchInfo<'a> {
75    matched_pattern: &'a str,
76    file_match: &'a [String],
77    name: &'a str,
78    description: Option<&'a str>,
79}
80
81impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
82    fn from(m: SchemaMatch<'a>) -> Self {
83        Self {
84            matched_pattern: m.matched_pattern,
85            file_match: m.file_match,
86            name: m.name,
87            description: m.description,
88        }
89    }
90}
91
92// ---------------------------------------------------------------------------
93// Public functions
94// ---------------------------------------------------------------------------
95
96/// Build a [`SchemaCache`] from [`CliCacheOptions`].
97pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
98    let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
99    if let Some(dir) = &cache.cache_dir {
100        builder = builder.cache_dir(PathBuf::from(dir));
101    }
102    if let Some(ttl) = cache.schema_cache_ttl {
103        builder = builder.ttl(ttl);
104    }
105    builder.build()
106}
107
108/// Resolve the schema URI for a file path using the same priority as validation:
109/// 1. Inline `$schema` / YAML modeline
110/// 2. Custom schema mappings from `lintel.toml [schemas]`
111/// 3. Catalog matching
112///
113/// # Errors
114///
115/// Returns an error if the file cannot be read.
116#[allow(clippy::missing_panics_doc)]
117pub async fn resolve_schema_for_file(
118    file_path: &Path,
119    cache: &CliCacheOptions,
120) -> Result<Option<ResolvedFileSchema>> {
121    let path_str = file_path.display().to_string();
122    let content =
123        std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
124
125    resolve_schema_for_content(&content, file_path, None, cache).await
126}
127
128/// Resolve a schema from in-memory content and a virtual file path.
129///
130/// Uses `file_path` for extension detection and catalog matching, and
131/// `config_search_dir` for locating `lintel.toml` (falls back to
132/// `file_path.parent()` when `None`).
133///
134/// Resolution order: inline `$schema` > config > catalogs.
135///
136/// # Errors
137///
138/// Returns an error if catalogs cannot be fetched.
139#[allow(clippy::missing_panics_doc)]
140pub async fn resolve_schema_for_content(
141    content: &str,
142    file_path: &Path,
143    config_search_dir: Option<&Path>,
144    cache: &CliCacheOptions,
145) -> Result<Option<ResolvedFileSchema>> {
146    let path_str = file_path.display().to_string();
147    let file_name = file_path
148        .file_name()
149        .and_then(|n| n.to_str())
150        .unwrap_or(&path_str);
151
152    let retriever = build_retriever(cache);
153
154    let search_dir = config_search_dir
155        .map(Path::to_path_buf)
156        .or_else(|| file_path.parent().map(Path::to_path_buf));
157    let (cfg, config_dir, _config_path) = validate::load_config(search_dir.as_deref());
158
159    let compiled_catalogs =
160        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
161
162    let detected_format = parsers::detect_format(file_path);
163    let (parser, instance) = parse_file(detected_format, content, &path_str);
164
165    let Some(resolved) = resolve_schema(
166        parser.as_ref(),
167        content,
168        &instance,
169        &path_str,
170        file_name,
171        &cfg,
172        &compiled_catalogs,
173    ) else {
174        return Ok(None);
175    };
176
177    Ok(Some(build_resolved_file_schema(
178        resolved,
179        &cfg,
180        &config_dir,
181        file_path,
182        &compiled_catalogs,
183    )))
184}
185
186/// Resolve the schema URI for a file path using only path-based matching:
187/// 1. Custom schema mappings from `lintel.toml [schemas]`
188/// 2. Catalog matching
189///
190/// Unlike [`resolve_schema_for_file`], this does NOT read the file or check
191/// for inline `$schema` directives. The file does not need to exist.
192///
193/// # Errors
194///
195/// Returns an error if the catalogs cannot be fetched.
196#[allow(clippy::missing_panics_doc)]
197pub async fn resolve_schema_for_path(
198    file_path: &Path,
199    cache: &CliCacheOptions,
200) -> Result<Option<ResolvedFileSchema>> {
201    let path_str = file_path.display().to_string();
202    let file_name = file_path
203        .file_name()
204        .and_then(|n| n.to_str())
205        .unwrap_or(&path_str);
206
207    let retriever = build_retriever(cache);
208
209    let config_search_dir = file_path.parent().map(Path::to_path_buf);
210    let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
211
212    let compiled_catalogs =
213        validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
214
215    let Some(resolved) = resolve_schema_path_only(&path_str, file_name, &cfg, &compiled_catalogs)
216    else {
217        return Ok(None);
218    };
219
220    Ok(Some(build_resolved_file_schema(
221        resolved,
222        &cfg,
223        &config_dir,
224        file_path,
225        &compiled_catalogs,
226    )))
227}
228
229// ---------------------------------------------------------------------------
230// Private helpers
231// ---------------------------------------------------------------------------
232
233/// Build a `ResolvedFileSchema` from intermediate resolution data.
234#[allow(clippy::too_many_arguments)]
235fn build_resolved_file_schema(
236    resolved: ResolvedSchema<'_>,
237    cfg: &lintel_config::Config,
238    config_dir: &Path,
239    file_path: &Path,
240    compiled_catalogs: &[schema_catalog::CompiledCatalog],
241) -> ResolvedFileSchema {
242    let from_inline = matches!(resolved.source, SchemaSource::Inline);
243    let (schema_uri, is_remote) = finalize_uri(
244        &resolved.uri,
245        &cfg.rewrite,
246        config_dir,
247        file_path,
248        from_inline,
249    );
250
251    let display_name = resolved
252        .catalog_match
253        .as_ref()
254        .map(|m| m.name.to_string())
255        .or_else(|| {
256            compiled_catalogs
257                .iter()
258                .find_map(|cat| cat.schema_name(&schema_uri))
259                .map(str::to_string)
260        })
261        .unwrap_or_else(|| schema_uri.clone());
262
263    let matched_pattern = match &resolved.source {
264        SchemaSource::Config => resolved.config_pattern.map(str::to_string),
265        SchemaSource::Catalog => resolved
266            .catalog_match
267            .as_ref()
268            .map(|m| m.matched_pattern.to_string()),
269        SchemaSource::Inline => None,
270    };
271
272    let file_match = resolved
273        .catalog_match
274        .as_ref()
275        .map(|m| m.file_match.to_vec())
276        .unwrap_or_default();
277
278    let description = resolved
279        .catalog_match
280        .as_ref()
281        .and_then(|m| m.description.map(str::to_string));
282
283    ResolvedFileSchema {
284        schema_uri,
285        display_name,
286        is_remote,
287        source: resolved.source,
288        matched_pattern,
289        file_match,
290        description,
291    }
292}
293
294/// Try each resolution source in priority order, returning `None` if no schema is found.
295#[allow(clippy::too_many_arguments)]
296fn resolve_schema<'a>(
297    parser: &dyn parsers::Parser,
298    content: &str,
299    instance: &serde_json::Value,
300    path_str: &str,
301    file_name: &'a str,
302    cfg: &'a lintel_config::Config,
303    catalogs: &'a [schema_catalog::CompiledCatalog],
304) -> Option<ResolvedSchema<'a>> {
305    if let Some(uri) = parser.extract_schema_uri(content, instance) {
306        return Some(ResolvedSchema {
307            uri,
308            source: SchemaSource::Inline,
309            catalog_match: None,
310            config_pattern: None,
311        });
312    }
313
314    resolve_schema_path_only(path_str, file_name, cfg, catalogs)
315}
316
317/// Try config mappings and catalog matching only (no inline `$schema`).
318fn resolve_schema_path_only<'a>(
319    path_str: &str,
320    file_name: &'a str,
321    cfg: &'a lintel_config::Config,
322    catalogs: &'a [schema_catalog::CompiledCatalog],
323) -> Option<ResolvedSchema<'a>> {
324    if let Some((pattern, url)) = cfg
325        .schemas
326        .iter()
327        .find(|(pattern, _)| {
328            let p = path_str.strip_prefix("./").unwrap_or(path_str);
329            glob_matcher::glob_match(pattern, p) || glob_matcher::glob_match(pattern, file_name)
330        })
331        .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
332    {
333        return Some(ResolvedSchema {
334            uri: url.to_string(),
335            source: SchemaSource::Config,
336            catalog_match: None,
337            config_pattern: Some(pattern),
338        });
339    }
340
341    catalogs
342        .iter()
343        .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
344        .map(|schema_match| ResolvedSchema {
345            uri: schema_match.url.to_string(),
346            source: SchemaSource::Catalog,
347            catalog_match: Some(schema_match.into()),
348            config_pattern: None,
349        })
350}
351
352/// Apply rewrites, resolve relative paths, and determine whether the URI is remote.
353///
354/// When `from_inline` is true, relative paths resolve against the file's parent
355/// directory (inline `$schema`). Otherwise they resolve against the config
356/// directory where `lintel.toml` lives.
357#[allow(clippy::too_many_arguments)]
358fn finalize_uri(
359    raw_uri: &str,
360    rewrites: &HashMap<String, String>,
361    config_dir: &Path,
362    file_path: &Path,
363    from_inline: bool,
364) -> (String, bool) {
365    let schema_uri = lintel_config::apply_rewrites(raw_uri, rewrites);
366    let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
367
368    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
369    let schema_uri = if is_remote {
370        schema_uri
371    } else {
372        let base_dir = if from_inline {
373            file_path.parent()
374        } else {
375            Some(config_dir)
376        };
377        base_dir
378            .map(|dir| dir.join(&schema_uri).to_string_lossy().to_string())
379            .unwrap_or(schema_uri)
380    };
381
382    (schema_uri, is_remote)
383}
384
385/// Parse the file content, trying the detected format first, then all parsers as fallback.
386///
387/// Exits the process when the file cannot be parsed.
388fn parse_file(
389    detected_format: Option<FileFormat>,
390    content: &str,
391    path_str: &str,
392) -> (Box<dyn parsers::Parser>, serde_json::Value) {
393    if let Some(fmt) = detected_format {
394        let parser = parsers::parser_for(fmt);
395        if let Ok(val) = parser.parse(content, path_str) {
396            return (parser, val);
397        }
398        // Try all parsers as fallback
399        if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
400            return (parsers::parser_for(fmt), val);
401        }
402        eprintln!("{path_str}");
403        eprintln!("  no schema found (file could not be parsed)");
404        std::process::exit(0);
405    }
406
407    if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
408        return (parsers::parser_for(fmt), val);
409    }
410
411    eprintln!("{path_str}");
412    eprintln!("  no schema found (unrecognized format)");
413    std::process::exit(0);
414}