1use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11use anyhow::{Context, Result};
12
13use lintel_cli_common::CliCacheOptions;
14use lintel_schema_cache::SchemaCache;
15use lintel_validate::parsers;
16use lintel_validate::validate;
17use schema_catalog::{FileFormat, SchemaMatch};
18
19#[derive(Debug)]
25pub enum SchemaSource {
26 Inline,
27 Config,
28 Catalog,
29}
30
31impl core::fmt::Display for SchemaSource {
32 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
33 match self {
34 SchemaSource::Inline => write!(f, "inline"),
35 SchemaSource::Config => write!(f, "config"),
36 SchemaSource::Catalog => write!(f, "catalog"),
37 }
38 }
39}
40
41pub struct ResolvedFileSchema {
43 pub schema_uri: String,
45 pub display_name: String,
47 pub is_remote: bool,
49 pub source: SchemaSource,
51 pub matched_pattern: Option<String>,
53 pub file_match: Vec<String>,
55 pub description: Option<String>,
57}
58
59struct ResolvedSchema<'a> {
65 uri: String,
66 source: SchemaSource,
67 catalog_match: Option<CatalogMatchInfo<'a>>,
69 config_pattern: Option<&'a str>,
71}
72
73struct CatalogMatchInfo<'a> {
75 matched_pattern: &'a str,
76 file_match: &'a [String],
77 name: &'a str,
78 description: Option<&'a str>,
79}
80
81impl<'a> From<SchemaMatch<'a>> for CatalogMatchInfo<'a> {
82 fn from(m: SchemaMatch<'a>) -> Self {
83 Self {
84 matched_pattern: m.matched_pattern,
85 file_match: m.file_match,
86 name: m.name,
87 description: m.description,
88 }
89 }
90}
91
92pub fn build_retriever(cache: &CliCacheOptions) -> SchemaCache {
98 let mut builder = SchemaCache::builder().force_fetch(cache.force_schema_fetch || cache.force);
99 if let Some(dir) = &cache.cache_dir {
100 builder = builder.cache_dir(PathBuf::from(dir));
101 }
102 if let Some(ttl) = cache.schema_cache_ttl {
103 builder = builder.ttl(ttl);
104 }
105 builder.build()
106}
107
108#[allow(clippy::missing_panics_doc)]
117pub async fn resolve_schema_for_file(
118 file_path: &Path,
119 cache: &CliCacheOptions,
120) -> Result<Option<ResolvedFileSchema>> {
121 let path_str = file_path.display().to_string();
122 let content =
123 std::fs::read_to_string(file_path).with_context(|| format!("failed to read {path_str}"))?;
124
125 resolve_schema_for_content(&content, file_path, None, cache).await
126}
127
128#[allow(clippy::missing_panics_doc)]
140pub async fn resolve_schema_for_content(
141 content: &str,
142 file_path: &Path,
143 config_search_dir: Option<&Path>,
144 cache: &CliCacheOptions,
145) -> Result<Option<ResolvedFileSchema>> {
146 let path_str = file_path.display().to_string();
147 let file_name = file_path
148 .file_name()
149 .and_then(|n| n.to_str())
150 .unwrap_or(&path_str);
151
152 let retriever = build_retriever(cache);
153
154 let search_dir = config_search_dir
155 .map(Path::to_path_buf)
156 .or_else(|| file_path.parent().map(Path::to_path_buf));
157 let (cfg, config_dir, _config_path) = validate::load_config(search_dir.as_deref());
158
159 let compiled_catalogs =
160 validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
161
162 let detected_format = parsers::detect_format(file_path);
163 let (parser, instance) = parse_file(detected_format, content, &path_str);
164
165 let Some(resolved) = resolve_schema(
166 parser.as_ref(),
167 content,
168 &instance,
169 &path_str,
170 file_name,
171 &cfg,
172 &compiled_catalogs,
173 ) else {
174 return Ok(None);
175 };
176
177 Ok(Some(build_resolved_file_schema(
178 resolved,
179 &cfg,
180 &config_dir,
181 file_path,
182 &compiled_catalogs,
183 )))
184}
185
186#[allow(clippy::missing_panics_doc)]
197pub async fn resolve_schema_for_path(
198 file_path: &Path,
199 cache: &CliCacheOptions,
200) -> Result<Option<ResolvedFileSchema>> {
201 let path_str = file_path.display().to_string();
202 let file_name = file_path
203 .file_name()
204 .and_then(|n| n.to_str())
205 .unwrap_or(&path_str);
206
207 let retriever = build_retriever(cache);
208
209 let config_search_dir = file_path.parent().map(Path::to_path_buf);
210 let (cfg, config_dir, _config_path) = validate::load_config(config_search_dir.as_deref());
211
212 let compiled_catalogs =
213 validate::fetch_compiled_catalogs(&retriever, &cfg, cache.no_catalog).await;
214
215 let Some(resolved) = resolve_schema_path_only(&path_str, file_name, &cfg, &compiled_catalogs)
216 else {
217 return Ok(None);
218 };
219
220 Ok(Some(build_resolved_file_schema(
221 resolved,
222 &cfg,
223 &config_dir,
224 file_path,
225 &compiled_catalogs,
226 )))
227}
228
229#[allow(clippy::too_many_arguments)]
235fn build_resolved_file_schema(
236 resolved: ResolvedSchema<'_>,
237 cfg: &lintel_config::Config,
238 config_dir: &Path,
239 file_path: &Path,
240 compiled_catalogs: &[schema_catalog::CompiledCatalog],
241) -> ResolvedFileSchema {
242 let from_inline = matches!(resolved.source, SchemaSource::Inline);
243 let (schema_uri, is_remote) = finalize_uri(
244 &resolved.uri,
245 &cfg.rewrite,
246 config_dir,
247 file_path,
248 from_inline,
249 );
250
251 let display_name = resolved
252 .catalog_match
253 .as_ref()
254 .map(|m| m.name.to_string())
255 .or_else(|| {
256 compiled_catalogs
257 .iter()
258 .find_map(|cat| cat.schema_name(&schema_uri))
259 .map(str::to_string)
260 })
261 .unwrap_or_else(|| schema_uri.clone());
262
263 let matched_pattern = match &resolved.source {
264 SchemaSource::Config => resolved.config_pattern.map(str::to_string),
265 SchemaSource::Catalog => resolved
266 .catalog_match
267 .as_ref()
268 .map(|m| m.matched_pattern.to_string()),
269 SchemaSource::Inline => None,
270 };
271
272 let file_match = resolved
273 .catalog_match
274 .as_ref()
275 .map(|m| m.file_match.to_vec())
276 .unwrap_or_default();
277
278 let description = resolved
279 .catalog_match
280 .as_ref()
281 .and_then(|m| m.description.map(str::to_string));
282
283 ResolvedFileSchema {
284 schema_uri,
285 display_name,
286 is_remote,
287 source: resolved.source,
288 matched_pattern,
289 file_match,
290 description,
291 }
292}
293
294#[allow(clippy::too_many_arguments)]
296fn resolve_schema<'a>(
297 parser: &dyn parsers::Parser,
298 content: &str,
299 instance: &serde_json::Value,
300 path_str: &str,
301 file_name: &'a str,
302 cfg: &'a lintel_config::Config,
303 catalogs: &'a [schema_catalog::CompiledCatalog],
304) -> Option<ResolvedSchema<'a>> {
305 if let Some(uri) = parser.extract_schema_uri(content, instance) {
306 return Some(ResolvedSchema {
307 uri,
308 source: SchemaSource::Inline,
309 catalog_match: None,
310 config_pattern: None,
311 });
312 }
313
314 resolve_schema_path_only(path_str, file_name, cfg, catalogs)
315}
316
317fn resolve_schema_path_only<'a>(
319 path_str: &str,
320 file_name: &'a str,
321 cfg: &'a lintel_config::Config,
322 catalogs: &'a [schema_catalog::CompiledCatalog],
323) -> Option<ResolvedSchema<'a>> {
324 if let Some((pattern, url)) = cfg
325 .schemas
326 .iter()
327 .find(|(pattern, _)| {
328 let p = path_str.strip_prefix("./").unwrap_or(path_str);
329 glob_matcher::glob_match(pattern, p) || glob_matcher::glob_match(pattern, file_name)
330 })
331 .map(|(pattern, url)| (pattern.as_str(), url.as_str()))
332 {
333 return Some(ResolvedSchema {
334 uri: url.to_string(),
335 source: SchemaSource::Config,
336 catalog_match: None,
337 config_pattern: Some(pattern),
338 });
339 }
340
341 catalogs
342 .iter()
343 .find_map(|cat| cat.find_schema_detailed(path_str, file_name))
344 .map(|schema_match| ResolvedSchema {
345 uri: schema_match.url.to_string(),
346 source: SchemaSource::Catalog,
347 catalog_match: Some(schema_match.into()),
348 config_pattern: None,
349 })
350}
351
352#[allow(clippy::too_many_arguments)]
358fn finalize_uri(
359 raw_uri: &str,
360 rewrites: &HashMap<String, String>,
361 config_dir: &Path,
362 file_path: &Path,
363 from_inline: bool,
364) -> (String, bool) {
365 let schema_uri = lintel_config::apply_rewrites(raw_uri, rewrites);
366 let schema_uri = lintel_config::resolve_double_slash(&schema_uri, config_dir);
367
368 let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
369 let schema_uri = if is_remote {
370 schema_uri
371 } else {
372 let base_dir = if from_inline {
373 file_path.parent()
374 } else {
375 Some(config_dir)
376 };
377 base_dir
378 .map(|dir| dir.join(&schema_uri).to_string_lossy().to_string())
379 .unwrap_or(schema_uri)
380 };
381
382 (schema_uri, is_remote)
383}
384
385fn parse_file(
389 detected_format: Option<FileFormat>,
390 content: &str,
391 path_str: &str,
392) -> (Box<dyn parsers::Parser>, serde_json::Value) {
393 if let Some(fmt) = detected_format {
394 let parser = parsers::parser_for(fmt);
395 if let Ok(val) = parser.parse(content, path_str) {
396 return (parser, val);
397 }
398 if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
400 return (parsers::parser_for(fmt), val);
401 }
402 eprintln!("{path_str}");
403 eprintln!(" no schema found (file could not be parsed)");
404 std::process::exit(0);
405 }
406
407 if let Some((fmt, val)) = validate::try_parse_all(content, path_str) {
408 return (parsers::parser_for(fmt), val);
409 }
410
411 eprintln!("{path_str}");
412 eprintln!(" no schema found (unrecognized format)");
413 std::process::exit(0);
414}