1use std::fs;
2use std::path::{Path, PathBuf};
3use std::time::Duration;
4
5use anyhow::{Context, Result};
6use bpaf::{Bpaf, Parser};
7use glob::glob;
8
9use lintel_check::catalog::{self, CompiledCatalog};
10use lintel_check::config;
11use lintel_check::discover;
12use lintel_check::parsers;
13use lintel_check::registry;
14use lintel_check::retriever::{HttpClient, SchemaCache, ensure_cache_dir};
15
16#[derive(Debug, Clone, Bpaf)]
21#[bpaf(generate(annotate_args_inner))]
22pub struct AnnotateArgs {
23 #[bpaf(long("exclude"), argument("PATTERN"))]
24 pub exclude: Vec<String>,
25
26 #[bpaf(long("cache-dir"), argument("DIR"))]
27 pub cache_dir: Option<String>,
28
29 #[bpaf(long("no-catalog"), switch)]
30 pub no_catalog: bool,
31
32 #[bpaf(external(schema_cache_ttl))]
33 pub schema_cache_ttl: Option<Duration>,
34
35 #[bpaf(long("update"), switch)]
37 pub update: bool,
38
39 #[bpaf(positional("PATH"))]
40 pub globs: Vec<String>,
41}
42
43fn schema_cache_ttl() -> impl bpaf::Parser<Option<Duration>> {
44 bpaf::long("schema-cache-ttl")
45 .help("Schema cache TTL (e.g. \"12h\", \"30m\", \"1d\"); default 12h")
46 .argument::<String>("DURATION")
47 .parse(|s: String| {
48 humantime::parse_duration(&s).map_err(|e| format!("invalid duration '{s}': {e}"))
49 })
50 .optional()
51}
52
53pub fn annotate_args() -> impl bpaf::Parser<AnnotateArgs> {
55 annotate_args_inner()
56}
57
58pub struct AnnotatedFile {
63 pub path: String,
64 pub schema_url: String,
65}
66
67pub struct AnnotateResult {
68 pub annotated: Vec<AnnotatedFile>,
69 pub updated: Vec<AnnotatedFile>,
70 pub skipped: usize,
71 pub errors: Vec<(String, String)>,
72}
73
74fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf) {
79 let start_dir = match search_dir {
80 Some(d) => d.to_path_buf(),
81 None => match std::env::current_dir() {
82 Ok(d) => d,
83 Err(_) => return (config::Config::default(), PathBuf::from(".")),
84 },
85 };
86
87 let cfg = config::find_and_load(&start_dir)
88 .ok()
89 .flatten()
90 .unwrap_or_default();
91 (cfg, start_dir)
92}
93
94fn collect_files(globs_arg: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
99 if globs_arg.is_empty() {
100 return discover::discover_files(".", exclude);
101 }
102
103 let mut result = Vec::new();
104 for pattern in globs_arg {
105 let path = Path::new(pattern);
106 if path.is_dir() {
107 result.extend(discover::discover_files(pattern, exclude)?);
108 } else {
109 for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
110 let path = entry?;
111 if path.is_file() && !is_excluded(&path, exclude) {
112 result.push(path);
113 }
114 }
115 }
116 }
117 Ok(result)
118}
119
120fn is_excluded(path: &Path, excludes: &[String]) -> bool {
121 let path_str = match path.to_str() {
122 Some(s) => s.strip_prefix("./").unwrap_or(s),
123 None => return false,
124 };
125 excludes
126 .iter()
127 .any(|pattern| glob_match::glob_match(pattern, path_str))
128}
129
130async fn fetch_catalogs<C: HttpClient>(
135 retriever: &SchemaCache<C>,
136 registries: &[String],
137) -> Vec<CompiledCatalog> {
138 type CatalogResult = (
139 String,
140 Result<CompiledCatalog, Box<dyn std::error::Error + Send + Sync>>,
141 );
142 let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
143
144 let r = retriever.clone();
146 let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
147 catalog_tasks.spawn(async move {
148 let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
149 .await
150 .map(|cat| CompiledCatalog::compile(&cat));
151 (label, result)
152 });
153
154 let r = retriever.clone();
156 catalog_tasks.spawn(async move {
157 let result = catalog::fetch_catalog(&r)
158 .await
159 .map(|cat| CompiledCatalog::compile(&cat));
160 ("SchemaStore catalog".to_string(), result)
161 });
162
163 for registry_url in registries {
165 let r = retriever.clone();
166 let url = registry_url.clone();
167 let label = format!("registry {url}");
168 catalog_tasks.spawn(async move {
169 let result = registry::fetch(&r, &url)
170 .await
171 .map(|cat| CompiledCatalog::compile(&cat));
172 (label, result)
173 });
174 }
175
176 let mut compiled = Vec::new();
177 while let Some(result) = catalog_tasks.join_next().await {
178 match result {
179 Ok((_, Ok(catalog))) => compiled.push(catalog),
180 Ok((label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
181 Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
182 }
183 }
184 compiled
185}
186
187enum FileOutcome {
192 Annotated(AnnotatedFile),
193 Updated(AnnotatedFile),
194 Skipped,
195 Error(String, String),
196}
197
198fn process_file(
199 file_path: &Path,
200 config: &config::Config,
201 catalogs: &[CompiledCatalog],
202 update: bool,
203) -> FileOutcome {
204 let path_str = file_path.display().to_string();
205 let file_name = file_path
206 .file_name()
207 .and_then(|n| n.to_str())
208 .unwrap_or(&path_str);
209
210 let content = match fs::read_to_string(file_path) {
211 Ok(c) => c,
212 Err(e) => return FileOutcome::Error(path_str, format!("failed to read: {e}")),
213 };
214
215 let Some(fmt) = parsers::detect_format(file_path) else {
216 return FileOutcome::Skipped;
217 };
218
219 let parser = parsers::parser_for(fmt);
220 let Ok(instance) = parser.parse(&content, &path_str) else {
221 return FileOutcome::Skipped;
222 };
223
224 let existing_schema = parser.extract_schema_uri(&content, &instance);
225 if existing_schema.is_some() && !update {
226 return FileOutcome::Skipped;
227 }
228
229 let schema_url = config
230 .find_schema_mapping(&path_str, file_name)
231 .map(str::to_string)
232 .or_else(|| {
233 catalogs
234 .iter()
235 .find_map(|cat| cat.find_schema(&path_str, file_name))
236 .map(str::to_string)
237 });
238
239 let Some(schema_url) = schema_url else {
240 return FileOutcome::Skipped;
241 };
242
243 let is_update = existing_schema.is_some();
244 if existing_schema.is_some_and(|existing| existing == schema_url) {
245 return FileOutcome::Skipped;
246 }
247
248 let content = if is_update {
249 parser.strip_annotation(&content)
250 } else {
251 content
252 };
253
254 let Some(new_content) = parser.annotate(&content, &schema_url) else {
255 return FileOutcome::Skipped;
256 };
257
258 match fs::write(file_path, &new_content) {
259 Ok(()) => {
260 let file = AnnotatedFile {
261 path: path_str,
262 schema_url,
263 };
264 if is_update {
265 FileOutcome::Updated(file)
266 } else {
267 FileOutcome::Annotated(file)
268 }
269 }
270 Err(e) => FileOutcome::Error(path_str, format!("failed to write: {e}")),
271 }
272}
273
274#[tracing::instrument(skip_all, name = "annotate")]
288pub async fn run<C: HttpClient>(args: &AnnotateArgs, client: C) -> Result<AnnotateResult> {
289 let config_dir = args
290 .globs
291 .iter()
292 .find(|g| Path::new(g).is_dir())
293 .map(PathBuf::from);
294
295 let schema_cache_ttl = args.schema_cache_ttl;
296
297 let cache_dir_path = args
298 .cache_dir
299 .as_ref()
300 .map_or_else(ensure_cache_dir, PathBuf::from);
301 let retriever = SchemaCache::new(
302 Some(cache_dir_path),
303 client,
304 false, schema_cache_ttl,
306 );
307
308 let (mut config, _config_dir) = load_config(config_dir.as_deref());
309 config.exclude.extend(args.exclude.clone());
310
311 let files = collect_files(&args.globs, &config.exclude)?;
312 tracing::info!(file_count = files.len(), "collected files");
313
314 let catalogs = if args.no_catalog {
315 Vec::new()
316 } else {
317 fetch_catalogs(&retriever, &config.registries).await
318 };
319
320 let mut result = AnnotateResult {
321 annotated: Vec::new(),
322 updated: Vec::new(),
323 skipped: 0,
324 errors: Vec::new(),
325 };
326
327 for file_path in &files {
328 match process_file(file_path, &config, &catalogs, args.update) {
329 FileOutcome::Annotated(f) => result.annotated.push(f),
330 FileOutcome::Updated(f) => result.updated.push(f),
331 FileOutcome::Skipped => result.skipped += 1,
332 FileOutcome::Error(path, msg) => result.errors.push((path, msg)),
333 }
334 }
335
336 Ok(result)
337}
338
339#[cfg(test)]
340mod tests {
341 use lintel_check::parsers::{
342 Json5Parser, JsonParser, JsoncParser, Parser, TomlParser, YamlParser,
343 };
344
345 #[test]
348 fn json_compact() {
349 let result = JsonParser
350 .annotate(r#"{"name":"hello"}"#, "https://example.com/schema.json")
351 .expect("annotate failed");
352 assert_eq!(
353 result,
354 r#"{"$schema":"https://example.com/schema.json","name":"hello"}"#
355 );
356 }
357
358 #[test]
359 fn json_pretty() {
360 let result = JsonParser
361 .annotate(
362 "{\n \"name\": \"hello\"\n}\n",
363 "https://example.com/schema.json",
364 )
365 .expect("annotate failed");
366 assert_eq!(
367 result,
368 "{\n \"$schema\": \"https://example.com/schema.json\",\n \"name\": \"hello\"\n}\n"
369 );
370 }
371
372 #[test]
373 fn json_pretty_4_spaces() {
374 let result = JsonParser
375 .annotate(
376 "{\n \"name\": \"hello\"\n}\n",
377 "https://example.com/schema.json",
378 )
379 .expect("annotate failed");
380 assert_eq!(
381 result,
382 "{\n \"$schema\": \"https://example.com/schema.json\",\n \"name\": \"hello\"\n}\n"
383 );
384 }
385
386 #[test]
387 fn json_pretty_tabs() {
388 let result = JsonParser
389 .annotate(
390 "{\n\t\"name\": \"hello\"\n}\n",
391 "https://example.com/schema.json",
392 )
393 .expect("annotate failed");
394 assert_eq!(
395 result,
396 "{\n\t\"$schema\": \"https://example.com/schema.json\",\n\t\"name\": \"hello\"\n}\n"
397 );
398 }
399
400 #[test]
401 fn json_empty_object() {
402 let result = JsonParser
403 .annotate("{}", "https://example.com/schema.json")
404 .expect("annotate failed");
405 assert_eq!(result, r#"{"$schema":"https://example.com/schema.json",}"#);
406 }
407
408 #[test]
409 fn json_empty_object_pretty() {
410 let result = JsonParser
411 .annotate("{\n}\n", "https://example.com/schema.json")
412 .expect("annotate failed");
413 assert!(result.contains("\"$schema\": \"https://example.com/schema.json\""));
414 }
415
416 #[test]
419 fn json5_compact() {
420 let result = Json5Parser
421 .annotate(r#"{"name":"hello"}"#, "https://example.com/schema.json")
422 .expect("annotate failed");
423 assert_eq!(
424 result,
425 r#"{"$schema":"https://example.com/schema.json","name":"hello"}"#
426 );
427 }
428
429 #[test]
432 fn jsonc_compact() {
433 let result = JsoncParser
434 .annotate(r#"{"name":"hello"}"#, "https://example.com/schema.json")
435 .expect("annotate failed");
436 assert_eq!(
437 result,
438 r#"{"$schema":"https://example.com/schema.json","name":"hello"}"#
439 );
440 }
441
442 #[test]
445 fn yaml_prepends_modeline() {
446 let result = YamlParser
447 .annotate("name: hello\n", "https://example.com/schema.json")
448 .expect("annotate failed");
449 assert_eq!(
450 result,
451 "# yaml-language-server: $schema=https://example.com/schema.json\nname: hello\n"
452 );
453 }
454
455 #[test]
456 fn yaml_preserves_existing_comments() {
457 let result = YamlParser
458 .annotate(
459 "# existing comment\nname: hello\n",
460 "https://example.com/schema.json",
461 )
462 .expect("annotate failed");
463 assert_eq!(
464 result,
465 "# yaml-language-server: $schema=https://example.com/schema.json\n# existing comment\nname: hello\n"
466 );
467 }
468
469 #[test]
472 fn toml_prepends_schema_comment() {
473 let result = TomlParser
474 .annotate("name = \"hello\"\n", "https://example.com/schema.json")
475 .expect("annotate failed");
476 assert_eq!(
477 result,
478 "# :schema https://example.com/schema.json\nname = \"hello\"\n"
479 );
480 }
481
482 #[test]
483 fn toml_preserves_existing_comments() {
484 let result = TomlParser
485 .annotate(
486 "# existing comment\nname = \"hello\"\n",
487 "https://example.com/schema.json",
488 )
489 .expect("annotate failed");
490 assert_eq!(
491 result,
492 "# :schema https://example.com/schema.json\n# existing comment\nname = \"hello\"\n"
493 );
494 }
495
496 #[test]
499 fn json_strip_compact_first_property() {
500 let input = r#"{"$schema":"https://old.com/s.json","name":"hello"}"#;
501 assert_eq!(JsonParser.strip_annotation(input), r#"{"name":"hello"}"#);
502 }
503
504 #[test]
505 fn json_strip_pretty_first_property() {
506 let input = "{\n \"$schema\": \"https://old.com/s.json\",\n \"name\": \"hello\"\n}\n";
507 assert_eq!(
508 JsonParser.strip_annotation(input),
509 "{\n \"name\": \"hello\"\n}\n"
510 );
511 }
512
513 #[test]
514 fn json_strip_only_property() {
515 let input = r#"{"$schema":"https://old.com/s.json"}"#;
516 assert_eq!(JsonParser.strip_annotation(input), "{}");
517 }
518
519 #[test]
520 fn json_strip_last_property() {
521 let input = r#"{"name":"hello","$schema":"https://old.com/s.json"}"#;
522 assert_eq!(JsonParser.strip_annotation(input), r#"{"name":"hello"}"#);
523 }
524
525 #[test]
526 fn json_strip_no_schema() {
527 let input = r#"{"name":"hello"}"#;
528 assert_eq!(JsonParser.strip_annotation(input), input);
529 }
530
531 #[test]
534 fn yaml_strip_modeline() {
535 let input = "# yaml-language-server: $schema=https://old.com/s.json\nname: hello\n";
536 assert_eq!(YamlParser.strip_annotation(input), "name: hello\n");
537 }
538
539 #[test]
540 fn yaml_strip_modeline_preserves_other_comments() {
541 let input =
542 "# yaml-language-server: $schema=https://old.com/s.json\n# other\nname: hello\n";
543 assert_eq!(YamlParser.strip_annotation(input), "# other\nname: hello\n");
544 }
545
546 #[test]
547 fn yaml_strip_no_modeline() {
548 let input = "name: hello\n";
549 assert_eq!(YamlParser.strip_annotation(input), input);
550 }
551
552 #[test]
555 fn toml_strip_schema_comment() {
556 let input = "# :schema https://old.com/s.json\nname = \"hello\"\n";
557 assert_eq!(TomlParser.strip_annotation(input), "name = \"hello\"\n");
558 }
559
560 #[test]
561 fn toml_strip_legacy_schema_comment() {
562 let input = "# $schema: https://old.com/s.json\nname = \"hello\"\n";
563 assert_eq!(TomlParser.strip_annotation(input), "name = \"hello\"\n");
564 }
565
566 #[test]
567 fn toml_strip_preserves_other_comments() {
568 let input = "# :schema https://old.com/s.json\n# other\nname = \"hello\"\n";
569 assert_eq!(
570 TomlParser.strip_annotation(input),
571 "# other\nname = \"hello\"\n"
572 );
573 }
574
575 #[test]
576 fn toml_strip_no_schema() {
577 let input = "name = \"hello\"\n";
578 assert_eq!(TomlParser.strip_annotation(input), input);
579 }
580
581 #[test]
584 fn json_update_round_trip() {
585 let original = "{\n \"$schema\": \"https://old.com/s.json\",\n \"name\": \"hello\"\n}\n";
586 let stripped = JsonParser.strip_annotation(original);
587 let updated = JsonParser
588 .annotate(&stripped, "https://new.com/s.json")
589 .expect("annotate failed");
590 assert_eq!(
591 updated,
592 "{\n \"$schema\": \"https://new.com/s.json\",\n \"name\": \"hello\"\n}\n"
593 );
594 }
595
596 #[test]
597 fn yaml_update_round_trip() {
598 let original = "# yaml-language-server: $schema=https://old.com/s.json\nname: hello\n";
599 let stripped = YamlParser.strip_annotation(original);
600 let updated = YamlParser
601 .annotate(&stripped, "https://new.com/s.json")
602 .expect("annotate failed");
603 assert_eq!(
604 updated,
605 "# yaml-language-server: $schema=https://new.com/s.json\nname: hello\n"
606 );
607 }
608
609 #[test]
610 fn toml_update_round_trip() {
611 let original = "# :schema https://old.com/s.json\nname = \"hello\"\n";
612 let stripped = TomlParser.strip_annotation(original);
613 let updated = TomlParser
614 .annotate(&stripped, "https://new.com/s.json")
615 .expect("annotate failed");
616 assert_eq!(
617 updated,
618 "# :schema https://new.com/s.json\nname = \"hello\"\n"
619 );
620 }
621}