1mod json;
2mod json5;
3mod jsonc;
4pub mod jsonl;
5mod markdown;
6mod toml_parser;
7mod yaml;
8
9use std::path::Path;
10
11use schema_catalog::FileFormat;
12use serde_json::Value;
13
14use crate::diagnostics::ParseDiagnostic;
15
16pub use self::json::JsonParser;
17pub use self::json5::Json5Parser;
18pub use self::jsonc::JsoncParser;
19pub use self::jsonl::JsonlParser;
20pub use self::markdown::MarkdownParser;
21pub use self::toml_parser::TomlParser;
22pub use self::yaml::YamlParser;
23
24pub trait Parser {
29 fn parse(&self, content: &str, file_name: &str) -> Result<Value, ParseDiagnostic>;
33
34 fn extract_schema_uri(&self, _content: &str, value: &Value) -> Option<String> {
40 value
41 .get("$schema")
42 .and_then(Value::as_str)
43 .map(String::from)
44 }
45
46 fn annotate(&self, _content: &str, _schema_url: &str) -> Option<String> {
51 None
52 }
53
54 fn strip_annotation(&self, content: &str) -> String {
59 content.to_string()
60 }
61}
62
63pub fn detect_format(path: &Path) -> Option<FileFormat> {
65 match path.extension().and_then(|e| e.to_str()) {
66 Some("yaml" | "yml") => Some(FileFormat::Yaml),
67 Some("json5") => Some(FileFormat::Json5),
68 Some("jsonl" | "ndjson") => Some(FileFormat::Jsonl),
69 Some("json" | "jsonc") => Some(FileFormat::Jsonc),
70 Some("toml") => Some(FileFormat::Toml),
71 Some("md" | "mdx") => Some(FileFormat::Markdown),
72 _ => None,
73 }
74}
75
76pub fn parser_for(format: FileFormat) -> Box<dyn Parser> {
78 match format {
79 FileFormat::Json => Box::new(JsonParser),
80 FileFormat::Jsonl => Box::new(JsonlParser),
81 FileFormat::Json5 => Box::new(Json5Parser),
82 FileFormat::Jsonc => Box::new(JsoncParser),
83 FileFormat::Toml => Box::new(TomlParser),
84 FileFormat::Yaml => Box::new(YamlParser),
85 FileFormat::Markdown => Box::new(MarkdownParser),
86 }
87}
88
89pub(crate) fn annotate_json_content(content: &str, schema_url: &str) -> String {
93 let Some(brace_pos) = content.find('{') else {
94 return content.to_string();
95 };
96
97 let after_brace = &content[brace_pos + 1..];
98
99 let next_non_ws = after_brace.find(|c: char| !c.is_ascii_whitespace());
101 let has_newline_before_content = after_brace
102 .get(..next_non_ws.unwrap_or(0))
103 .is_some_and(|s| s.contains('\n'));
104
105 if has_newline_before_content {
106 let indent = detect_json_indent(after_brace);
107 format!(
108 "{}{{\n{indent}\"$schema\": \"{schema_url}\",{}",
109 &content[..brace_pos],
110 after_brace
111 )
112 } else {
113 format!(
114 "{}{{\"$schema\":\"{schema_url}\",{}",
115 &content[..brace_pos],
116 after_brace.trim_start()
117 )
118 }
119}
120
121fn detect_json_indent(after_brace: &str) -> String {
124 for line in after_brace.lines() {
125 let trimmed = line.trim();
126 if trimmed.is_empty() {
127 continue;
128 }
129 let indent_end = line.len() - line.trim_start().len();
130 return line[..indent_end].to_string();
131 }
132 " ".to_string()
133}
134
135pub(crate) fn strip_json_schema_property(content: &str) -> String {
139 let key = "\"$schema\"";
140 let Some(key_start) = content.find(key) else {
141 return content.to_string();
142 };
143
144 let key_end = key_start + key.len();
145 let mut pos = key_end;
146
147 while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
149 pos += 1;
150 }
151 if content.as_bytes().get(pos) != Some(&b':') {
153 return content.to_string();
154 }
155 pos += 1;
156
157 while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
159 pos += 1;
160 }
161 if content.as_bytes().get(pos) != Some(&b'"') {
163 return content.to_string();
164 }
165 pos += 1;
166
167 while pos < content.len() {
169 match content.as_bytes()[pos] {
170 b'\\' => pos += 2,
171 b'"' => {
172 pos += 1;
173 break;
174 }
175 _ => pos += 1,
176 }
177 }
178 let value_end = pos;
179
180 let ws_after = content.as_bytes()[value_end..]
182 .iter()
183 .take_while(|&&b| b == b' ' || b == b'\t')
184 .count();
185 let has_trailing_comma = content.as_bytes().get(value_end + ws_after) == Some(&b',');
186
187 if has_trailing_comma {
188 let remove_end = value_end + ws_after + 1; let before = &content[..key_start];
190 if let Some(nl_pos) = before.rfind('\n') {
191 format!("{}{}", &content[..nl_pos], &content[remove_end..])
193 } else {
194 let ws_skip = content.as_bytes()[remove_end..]
196 .iter()
197 .take_while(|&&b| b == b' ' || b == b'\t')
198 .count();
199 format!(
200 "{}{}",
201 &content[..key_start],
202 &content[remove_end + ws_skip..]
203 )
204 }
205 } else {
206 let before = &content[..key_start];
208 let rtrimmed = before.trim_end();
209 if rtrimmed.ends_with(',') {
210 let comma_pos = before.rfind(',').expect("comma before $schema");
212 format!("{}{}", &content[..comma_pos], &content[value_end..])
213 } else if let Some(nl_pos) = before.rfind('\n') {
214 format!("{}{}", &content[..nl_pos], &content[value_end..])
216 } else {
217 format!("{}{}", &content[..key_start], &content[value_end..])
219 }
220 }
221}
222
223pub fn line_col_to_offset(content: &str, line: usize, col: usize) -> usize {
225 let mut offset = 0;
226 for (i, l) in content.lines().enumerate() {
227 if i + 1 == line {
228 return offset + col.saturating_sub(1);
229 }
230 offset += l.len() + 1; }
232 offset.min(content.len())
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
242 fn detect_format_json() {
243 assert_eq!(
244 detect_format(Path::new("foo.json")),
245 Some(FileFormat::Jsonc)
246 );
247 }
248
249 #[test]
250 fn detect_format_yaml() {
251 assert_eq!(detect_format(Path::new("foo.yaml")), Some(FileFormat::Yaml));
252 assert_eq!(detect_format(Path::new("foo.yml")), Some(FileFormat::Yaml));
253 }
254
255 #[test]
256 fn detect_format_json5() {
257 assert_eq!(
258 detect_format(Path::new("foo.json5")),
259 Some(FileFormat::Json5)
260 );
261 }
262
263 #[test]
264 fn detect_format_jsonc() {
265 assert_eq!(
266 detect_format(Path::new("foo.jsonc")),
267 Some(FileFormat::Jsonc)
268 );
269 }
270
271 #[test]
272 fn detect_format_toml() {
273 assert_eq!(detect_format(Path::new("foo.toml")), Some(FileFormat::Toml));
274 }
275
276 #[test]
277 fn detect_format_jsonl() {
278 assert_eq!(
279 detect_format(Path::new("foo.jsonl")),
280 Some(FileFormat::Jsonl)
281 );
282 }
283
284 #[test]
285 fn detect_format_ndjson() {
286 assert_eq!(
287 detect_format(Path::new("foo.ndjson")),
288 Some(FileFormat::Jsonl)
289 );
290 }
291
292 #[test]
293 fn detect_format_unknown_returns_none() {
294 assert_eq!(detect_format(Path::new("foo.txt")), None);
295 assert_eq!(detect_format(Path::new("foo")), None);
296 assert_eq!(detect_format(Path::new("devenv.nix")), None);
297 }
298
299 #[test]
302 fn extract_schema_json_with_schema() {
303 let val = serde_json::json!({"$schema": "https://example.com/s.json"});
304 let uri = JsonParser.extract_schema_uri("", &val);
305 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
306 }
307
308 #[test]
309 fn extract_schema_json_without_schema() {
310 let val = serde_json::json!({"key": "value"});
311 let uri = JsonParser.extract_schema_uri("", &val);
312 assert!(uri.is_none());
313 }
314
315 #[test]
316 fn extract_schema_json5_with_schema() {
317 let val = serde_json::json!({"$schema": "https://example.com/s.json"});
318 let uri = Json5Parser.extract_schema_uri("", &val);
319 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
320 }
321
322 #[test]
323 fn extract_schema_jsonc_with_schema() {
324 let val = serde_json::json!({"$schema": "https://example.com/s.json"});
325 let uri = JsoncParser.extract_schema_uri("", &val);
326 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
327 }
328
329 #[test]
330 fn extract_schema_yaml_modeline() {
331 let content = "# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
332 let val = serde_json::json!({"key": "value"});
333 let uri = YamlParser.extract_schema_uri(content, &val);
334 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
335 }
336
337 #[test]
338 fn extract_schema_yaml_modeline_with_leading_blank_lines() {
339 let content = "\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
340 let val = serde_json::json!({"key": "value"});
341 let uri = YamlParser.extract_schema_uri(content, &val);
342 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
343 }
344
345 #[test]
346 fn extract_schema_yaml_modeline_after_other_comment() {
347 let content = "# some comment\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
348 let val = serde_json::json!({"key": "value"});
349 let uri = YamlParser.extract_schema_uri(content, &val);
350 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
351 }
352
353 #[test]
354 fn extract_schema_yaml_modeline_not_in_body() {
355 let content = "key: value\n# yaml-language-server: $schema=https://example.com/s.json\n";
356 let val = serde_json::json!({"key": "value"});
357 let uri = YamlParser.extract_schema_uri(content, &val);
358 assert!(uri.is_none());
359 }
360
361 #[test]
362 fn extract_schema_yaml_top_level_property() {
363 let content = "$schema: https://example.com/s.json\nkey: value\n";
364 let val = serde_json::json!({"$schema": "https://example.com/s.json", "key": "value"});
365 let uri = YamlParser.extract_schema_uri(content, &val);
366 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
367 }
368
369 #[test]
370 fn extract_schema_yaml_modeline_takes_priority() {
371 let content = "# yaml-language-server: $schema=https://modeline.com/s.json\n$schema: https://property.com/s.json\n";
372 let val = serde_json::json!({"$schema": "https://property.com/s.json"});
373 let uri = YamlParser.extract_schema_uri(content, &val);
374 assert_eq!(uri.as_deref(), Some("https://modeline.com/s.json"));
375 }
376
377 #[test]
378 fn extract_schema_yaml_none() {
379 let content = "key: value\n";
380 let val = serde_json::json!({"key": "value"});
381 let uri = YamlParser.extract_schema_uri(content, &val);
382 assert!(uri.is_none());
383 }
384
385 #[test]
388 fn extract_schema_toml_comment() {
389 let content = "# :schema https://example.com/s.json\nkey = \"value\"\n";
390 let val = serde_json::json!({"key": "value"});
391 let uri = TomlParser.extract_schema_uri(content, &val);
392 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
393 }
394
395 #[test]
396 fn extract_schema_toml_with_leading_blank_lines() {
397 let content = "\n# :schema https://example.com/s.json\nkey = \"value\"\n";
398 let val = serde_json::json!({"key": "value"});
399 let uri = TomlParser.extract_schema_uri(content, &val);
400 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
401 }
402
403 #[test]
404 fn extract_schema_toml_not_in_body() {
405 let content = "key = \"value\"\n# :schema https://example.com/s.json\n";
406 let val = serde_json::json!({"key": "value"});
407 let uri = TomlParser.extract_schema_uri(content, &val);
408 assert!(uri.is_none());
409 }
410
411 #[test]
412 fn extract_schema_toml_none() {
413 let content = "key = \"value\"\n";
414 let val = serde_json::json!({"key": "value"});
415 let uri = TomlParser.extract_schema_uri(content, &val);
416 assert!(uri.is_none());
417 }
418
419 #[test]
420 fn extract_schema_toml_legacy_dollar_schema() {
421 let content = "# $schema: https://example.com/s.json\nkey = \"value\"\n";
422 let val = serde_json::json!({"key": "value"});
423 let uri = TomlParser.extract_schema_uri(content, &val);
424 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
425 }
426
427 #[test]
430 fn line_col_to_offset_first_line() {
431 assert_eq!(line_col_to_offset("hello\nworld", 1, 1), 0);
432 assert_eq!(line_col_to_offset("hello\nworld", 1, 3), 2);
433 }
434
435 #[test]
436 fn line_col_to_offset_second_line() {
437 assert_eq!(line_col_to_offset("hello\nworld", 2, 1), 6);
438 assert_eq!(line_col_to_offset("hello\nworld", 2, 3), 8);
439 }
440
441 #[test]
444 fn parser_for_json_parses() -> anyhow::Result<()> {
445 let p = parser_for(FileFormat::Json);
446 let val = p.parse(r#"{"key":"value"}"#, "test.json")?;
447 assert_eq!(val, serde_json::json!({"key": "value"}));
448 Ok(())
449 }
450
451 #[test]
452 fn parser_for_yaml_parses() -> anyhow::Result<()> {
453 let p = parser_for(FileFormat::Yaml);
454 let val = p.parse("key: value\n", "test.yaml")?;
455 assert_eq!(val, serde_json::json!({"key": "value"}));
456 Ok(())
457 }
458
459 #[test]
460 fn parser_for_json5_parses() -> anyhow::Result<()> {
461 let p = parser_for(FileFormat::Json5);
462 let val = p.parse(r#"{key: "value"}"#, "test.json5")?;
463 assert_eq!(val, serde_json::json!({"key": "value"}));
464 Ok(())
465 }
466
467 #[test]
468 fn parser_for_jsonc_parses() -> anyhow::Result<()> {
469 let p = parser_for(FileFormat::Jsonc);
470 let val = p.parse(r#"{"key": "value" /* comment */}"#, "test.jsonc")?;
471 assert_eq!(val, serde_json::json!({"key": "value"}));
472 Ok(())
473 }
474
475 #[test]
476 fn parser_for_toml_parses() -> anyhow::Result<()> {
477 let p = parser_for(FileFormat::Toml);
478 let val = p.parse("key = \"value\"\n", "test.toml")?;
479 assert_eq!(val, serde_json::json!({"key": "value"}));
480 Ok(())
481 }
482}