Skip to main content

lintel_validate/parsers/
mod.rs

1mod json;
2mod json5;
3mod jsonc;
4mod markdown;
5mod toml_parser;
6mod yaml;
7
8use std::path::Path;
9
10use serde_json::Value;
11
12use crate::diagnostics::ParseDiagnostic;
13
14pub use self::json::JsonParser;
15pub use self::json5::Json5Parser;
16pub use self::jsonc::JsoncParser;
17pub use self::markdown::MarkdownParser;
18pub use self::toml_parser::TomlParser;
19pub use self::yaml::YamlParser;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FileFormat {
23    Json,
24    Json5,
25    Jsonc,
26    Toml,
27    Yaml,
28    Markdown,
29}
30
31/// Parse file content into a `serde_json::Value`.
32///
33/// Implementations must produce a [`ParseDiagnostic`] with an accurate source
34/// span when parsing fails.
35pub trait Parser {
36    /// # Errors
37    ///
38    /// Returns a [`ParseDiagnostic`] with an accurate source span when parsing fails.
39    fn parse(&self, content: &str, file_name: &str) -> Result<Value, ParseDiagnostic>;
40
41    /// Extract the `$schema` URI from file content and/or parsed value.
42    ///
43    /// The default implementation reads `value["$schema"]`, which works for
44    /// JSON, JSON5, and JSONC. YAML and TOML override this to handle their
45    /// format-specific conventions (modeline comments, etc.).
46    fn extract_schema_uri(&self, _content: &str, value: &Value) -> Option<String> {
47        value
48            .get("$schema")
49            .and_then(Value::as_str)
50            .map(String::from)
51    }
52
53    /// Insert a schema annotation into the file content.
54    ///
55    /// Returns `Some(annotated_content)` if the format supports inline schema
56    /// annotations, or `None` if it does not (e.g. Markdown).
57    fn annotate(&self, _content: &str, _schema_url: &str) -> Option<String> {
58        None
59    }
60
61    /// Remove an existing schema annotation from the file content.
62    ///
63    /// Returns the content with the annotation stripped. If no annotation is
64    /// found, returns the content unchanged.
65    fn strip_annotation(&self, content: &str) -> String {
66        content.to_string()
67    }
68}
69
70/// Detect file format from extension. Returns `None` for unrecognized extensions.
71pub fn detect_format(path: &Path) -> Option<FileFormat> {
72    match path.extension().and_then(|e| e.to_str()) {
73        Some("yaml" | "yml") => Some(FileFormat::Yaml),
74        Some("json5") => Some(FileFormat::Json5),
75        Some("json" | "jsonc") => Some(FileFormat::Jsonc),
76        Some("toml") => Some(FileFormat::Toml),
77        Some("md" | "mdx") => Some(FileFormat::Markdown),
78        _ => None,
79    }
80}
81
82/// Return a boxed parser for the given format.
83pub fn parser_for(format: FileFormat) -> Box<dyn Parser> {
84    match format {
85        FileFormat::Json => Box::new(JsonParser),
86        FileFormat::Json5 => Box::new(Json5Parser),
87        FileFormat::Jsonc => Box::new(JsoncParser),
88        FileFormat::Toml => Box::new(TomlParser),
89        FileFormat::Yaml => Box::new(YamlParser),
90        FileFormat::Markdown => Box::new(MarkdownParser),
91    }
92}
93
94/// Insert `"$schema": "URL"` as the first property after `{` in a JSON object.
95///
96/// Uses string manipulation (not parse+reserialize) to preserve formatting.
97pub(crate) fn annotate_json_content(content: &str, schema_url: &str) -> String {
98    let Some(brace_pos) = content.find('{') else {
99        return content.to_string();
100    };
101
102    let after_brace = &content[brace_pos + 1..];
103
104    // Detect if the content is compact (no newline before next non-whitespace)
105    let next_non_ws = after_brace.find(|c: char| !c.is_ascii_whitespace());
106    let has_newline_before_content = after_brace
107        .get(..next_non_ws.unwrap_or(0))
108        .is_some_and(|s| s.contains('\n'));
109
110    if has_newline_before_content {
111        let indent = detect_json_indent(after_brace);
112        format!(
113            "{}{{\n{indent}\"$schema\": \"{schema_url}\",{}",
114            &content[..brace_pos],
115            after_brace
116        )
117    } else {
118        format!(
119            "{}{{\"$schema\":\"{schema_url}\",{}",
120            &content[..brace_pos],
121            after_brace.trim_start()
122        )
123    }
124}
125
126/// Detect the indentation used in a JSON string (the whitespace at the start
127/// of the first content line after the opening brace).
128fn detect_json_indent(after_brace: &str) -> String {
129    for line in after_brace.lines() {
130        let trimmed = line.trim();
131        if trimmed.is_empty() {
132            continue;
133        }
134        let indent_end = line.len() - line.trim_start().len();
135        return line[..indent_end].to_string();
136    }
137    "  ".to_string()
138}
139
140/// Remove the top-level `"$schema"` property from a JSON string.
141///
142/// Uses string manipulation (not parse+reserialize) to preserve formatting.
143pub(crate) fn strip_json_schema_property(content: &str) -> String {
144    let key = "\"$schema\"";
145    let Some(key_start) = content.find(key) else {
146        return content.to_string();
147    };
148
149    let key_end = key_start + key.len();
150    let mut pos = key_end;
151
152    // Skip whitespace (space/tab) between key and colon
153    while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
154        pos += 1;
155    }
156    // Expect colon
157    if content.as_bytes().get(pos) != Some(&b':') {
158        return content.to_string();
159    }
160    pos += 1;
161
162    // Skip whitespace (space/tab) between colon and value
163    while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
164        pos += 1;
165    }
166    // Expect opening quote
167    if content.as_bytes().get(pos) != Some(&b'"') {
168        return content.to_string();
169    }
170    pos += 1;
171
172    // Read string value until closing quote (handling backslash escapes)
173    while pos < content.len() {
174        match content.as_bytes()[pos] {
175            b'\\' => pos += 2,
176            b'"' => {
177                pos += 1;
178                break;
179            }
180            _ => pos += 1,
181        }
182    }
183    let value_end = pos;
184
185    // Check for trailing comma (with optional space/tab before it)
186    let ws_after = content.as_bytes()[value_end..]
187        .iter()
188        .take_while(|&&b| b == b' ' || b == b'\t')
189        .count();
190    let has_trailing_comma = content.as_bytes().get(value_end + ws_after) == Some(&b',');
191
192    if has_trailing_comma {
193        let remove_end = value_end + ws_after + 1; // past the comma
194        let before = &content[..key_start];
195        if let Some(nl_pos) = before.rfind('\n') {
196            // Pretty-printed: remove from newline to past the comma
197            format!("{}{}", &content[..nl_pos], &content[remove_end..])
198        } else {
199            // Compact: remove key-value+comma and any space/tab after comma
200            let ws_skip = content.as_bytes()[remove_end..]
201                .iter()
202                .take_while(|&&b| b == b' ' || b == b'\t')
203                .count();
204            format!(
205                "{}{}",
206                &content[..key_start],
207                &content[remove_end + ws_skip..]
208            )
209        }
210    } else {
211        // No trailing comma — $schema is the only or last property
212        let before = &content[..key_start];
213        let rtrimmed = before.trim_end();
214        if rtrimmed.ends_with(',') {
215            // Last property: also remove the preceding comma
216            let comma_pos = before.rfind(',').expect("comma before $schema");
217            format!("{}{}", &content[..comma_pos], &content[value_end..])
218        } else if let Some(nl_pos) = before.rfind('\n') {
219            // Only property, pretty-printed
220            format!("{}{}", &content[..nl_pos], &content[value_end..])
221        } else {
222            // Only property, compact
223            format!("{}{}", &content[..key_start], &content[value_end..])
224        }
225    }
226}
227
228/// Convert 1-based line and column to a byte offset in content.
229pub fn line_col_to_offset(content: &str, line: usize, col: usize) -> usize {
230    let mut offset = 0;
231    for (i, l) in content.lines().enumerate() {
232        if i + 1 == line {
233            return offset + col.saturating_sub(1);
234        }
235        offset += l.len() + 1; // +1 for newline
236    }
237    offset.min(content.len())
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    // --- detect_format ---
245
246    #[test]
247    fn detect_format_json() {
248        assert_eq!(
249            detect_format(Path::new("foo.json")),
250            Some(FileFormat::Jsonc)
251        );
252    }
253
254    #[test]
255    fn detect_format_yaml() {
256        assert_eq!(detect_format(Path::new("foo.yaml")), Some(FileFormat::Yaml));
257        assert_eq!(detect_format(Path::new("foo.yml")), Some(FileFormat::Yaml));
258    }
259
260    #[test]
261    fn detect_format_json5() {
262        assert_eq!(
263            detect_format(Path::new("foo.json5")),
264            Some(FileFormat::Json5)
265        );
266    }
267
268    #[test]
269    fn detect_format_jsonc() {
270        assert_eq!(
271            detect_format(Path::new("foo.jsonc")),
272            Some(FileFormat::Jsonc)
273        );
274    }
275
276    #[test]
277    fn detect_format_toml() {
278        assert_eq!(detect_format(Path::new("foo.toml")), Some(FileFormat::Toml));
279    }
280
281    #[test]
282    fn detect_format_unknown_returns_none() {
283        assert_eq!(detect_format(Path::new("foo.txt")), None);
284        assert_eq!(detect_format(Path::new("foo")), None);
285        assert_eq!(detect_format(Path::new("devenv.nix")), None);
286    }
287
288    // --- extract_schema_uri via trait ---
289
290    #[test]
291    fn extract_schema_json_with_schema() {
292        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
293        let uri = JsonParser.extract_schema_uri("", &val);
294        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
295    }
296
297    #[test]
298    fn extract_schema_json_without_schema() {
299        let val = serde_json::json!({"key": "value"});
300        let uri = JsonParser.extract_schema_uri("", &val);
301        assert!(uri.is_none());
302    }
303
304    #[test]
305    fn extract_schema_json5_with_schema() {
306        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
307        let uri = Json5Parser.extract_schema_uri("", &val);
308        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
309    }
310
311    #[test]
312    fn extract_schema_jsonc_with_schema() {
313        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
314        let uri = JsoncParser.extract_schema_uri("", &val);
315        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
316    }
317
318    #[test]
319    fn extract_schema_yaml_modeline() {
320        let content = "# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
321        let val = serde_json::json!({"key": "value"});
322        let uri = YamlParser.extract_schema_uri(content, &val);
323        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
324    }
325
326    #[test]
327    fn extract_schema_yaml_modeline_with_leading_blank_lines() {
328        let content = "\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
329        let val = serde_json::json!({"key": "value"});
330        let uri = YamlParser.extract_schema_uri(content, &val);
331        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
332    }
333
334    #[test]
335    fn extract_schema_yaml_modeline_after_other_comment() {
336        let content = "# some comment\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
337        let val = serde_json::json!({"key": "value"});
338        let uri = YamlParser.extract_schema_uri(content, &val);
339        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
340    }
341
342    #[test]
343    fn extract_schema_yaml_modeline_not_in_body() {
344        let content = "key: value\n# yaml-language-server: $schema=https://example.com/s.json\n";
345        let val = serde_json::json!({"key": "value"});
346        let uri = YamlParser.extract_schema_uri(content, &val);
347        assert!(uri.is_none());
348    }
349
350    #[test]
351    fn extract_schema_yaml_top_level_property() {
352        let content = "$schema: https://example.com/s.json\nkey: value\n";
353        let val = serde_json::json!({"$schema": "https://example.com/s.json", "key": "value"});
354        let uri = YamlParser.extract_schema_uri(content, &val);
355        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
356    }
357
358    #[test]
359    fn extract_schema_yaml_modeline_takes_priority() {
360        let content = "# yaml-language-server: $schema=https://modeline.com/s.json\n$schema: https://property.com/s.json\n";
361        let val = serde_json::json!({"$schema": "https://property.com/s.json"});
362        let uri = YamlParser.extract_schema_uri(content, &val);
363        assert_eq!(uri.as_deref(), Some("https://modeline.com/s.json"));
364    }
365
366    #[test]
367    fn extract_schema_yaml_none() {
368        let content = "key: value\n";
369        let val = serde_json::json!({"key": "value"});
370        let uri = YamlParser.extract_schema_uri(content, &val);
371        assert!(uri.is_none());
372    }
373
374    // --- TOML schema extraction ---
375
376    #[test]
377    fn extract_schema_toml_comment() {
378        let content = "# :schema https://example.com/s.json\nkey = \"value\"\n";
379        let val = serde_json::json!({"key": "value"});
380        let uri = TomlParser.extract_schema_uri(content, &val);
381        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
382    }
383
384    #[test]
385    fn extract_schema_toml_with_leading_blank_lines() {
386        let content = "\n# :schema https://example.com/s.json\nkey = \"value\"\n";
387        let val = serde_json::json!({"key": "value"});
388        let uri = TomlParser.extract_schema_uri(content, &val);
389        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
390    }
391
392    #[test]
393    fn extract_schema_toml_not_in_body() {
394        let content = "key = \"value\"\n# :schema https://example.com/s.json\n";
395        let val = serde_json::json!({"key": "value"});
396        let uri = TomlParser.extract_schema_uri(content, &val);
397        assert!(uri.is_none());
398    }
399
400    #[test]
401    fn extract_schema_toml_none() {
402        let content = "key = \"value\"\n";
403        let val = serde_json::json!({"key": "value"});
404        let uri = TomlParser.extract_schema_uri(content, &val);
405        assert!(uri.is_none());
406    }
407
408    #[test]
409    fn extract_schema_toml_legacy_dollar_schema() {
410        let content = "# $schema: https://example.com/s.json\nkey = \"value\"\n";
411        let val = serde_json::json!({"key": "value"});
412        let uri = TomlParser.extract_schema_uri(content, &val);
413        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
414    }
415
416    // --- line_col_to_offset ---
417
418    #[test]
419    fn line_col_to_offset_first_line() {
420        assert_eq!(line_col_to_offset("hello\nworld", 1, 1), 0);
421        assert_eq!(line_col_to_offset("hello\nworld", 1, 3), 2);
422    }
423
424    #[test]
425    fn line_col_to_offset_second_line() {
426        assert_eq!(line_col_to_offset("hello\nworld", 2, 1), 6);
427        assert_eq!(line_col_to_offset("hello\nworld", 2, 3), 8);
428    }
429
430    // --- parser_for round-trip ---
431
432    #[test]
433    fn parser_for_json_parses() -> anyhow::Result<()> {
434        let p = parser_for(FileFormat::Json);
435        let val = p.parse(r#"{"key":"value"}"#, "test.json")?;
436        assert_eq!(val, serde_json::json!({"key": "value"}));
437        Ok(())
438    }
439
440    #[test]
441    fn parser_for_yaml_parses() -> anyhow::Result<()> {
442        let p = parser_for(FileFormat::Yaml);
443        let val = p.parse("key: value\n", "test.yaml")?;
444        assert_eq!(val, serde_json::json!({"key": "value"}));
445        Ok(())
446    }
447
448    #[test]
449    fn parser_for_json5_parses() -> anyhow::Result<()> {
450        let p = parser_for(FileFormat::Json5);
451        let val = p.parse(r#"{key: "value"}"#, "test.json5")?;
452        assert_eq!(val, serde_json::json!({"key": "value"}));
453        Ok(())
454    }
455
456    #[test]
457    fn parser_for_jsonc_parses() -> anyhow::Result<()> {
458        let p = parser_for(FileFormat::Jsonc);
459        let val = p.parse(r#"{"key": "value" /* comment */}"#, "test.jsonc")?;
460        assert_eq!(val, serde_json::json!({"key": "value"}));
461        Ok(())
462    }
463
464    #[test]
465    fn parser_for_toml_parses() -> anyhow::Result<()> {
466        let p = parser_for(FileFormat::Toml);
467        let val = p.parse("key = \"value\"\n", "test.toml")?;
468        assert_eq!(val, serde_json::json!({"key": "value"}));
469        Ok(())
470    }
471}