Skip to main content

lintel_validate/parsers/
mod.rs

1mod json;
2mod json5;
3mod jsonc;
4mod markdown;
5mod toml_parser;
6mod yaml;
7
8use std::path::Path;
9
10use serde_json::Value;
11
12use crate::diagnostics::ParseDiagnostic;
13
14pub use self::json::JsonParser;
15pub use self::json5::Json5Parser;
16pub use self::jsonc::JsoncParser;
17pub use self::markdown::MarkdownParser;
18pub use self::toml_parser::TomlParser;
19pub use self::yaml::YamlParser;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FileFormat {
23    Json,
24    Json5,
25    Jsonc,
26    Toml,
27    Yaml,
28    Markdown,
29}
30
31/// Parse file content into a `serde_json::Value`.
32///
33/// Implementations must produce a [`ParseDiagnostic`] with an accurate source
34/// span when parsing fails.
35pub trait Parser {
36    /// # Errors
37    ///
38    /// Returns a [`ParseDiagnostic`] with an accurate source span when parsing fails.
39    fn parse(&self, content: &str, file_name: &str) -> Result<Value, ParseDiagnostic>;
40
41    /// Extract the `$schema` URI from file content and/or parsed value.
42    ///
43    /// The default implementation reads `value["$schema"]`, which works for
44    /// JSON, JSON5, and JSONC. YAML and TOML override this to handle their
45    /// format-specific conventions (modeline comments, etc.).
46    fn extract_schema_uri(&self, _content: &str, value: &Value) -> Option<String> {
47        value
48            .get("$schema")
49            .and_then(Value::as_str)
50            .map(String::from)
51    }
52
53    /// Insert a schema annotation into the file content.
54    ///
55    /// Returns `Some(annotated_content)` if the format supports inline schema
56    /// annotations, or `None` if it does not (e.g. Markdown).
57    fn annotate(&self, _content: &str, _schema_url: &str) -> Option<String> {
58        None
59    }
60
61    /// Remove an existing schema annotation from the file content.
62    ///
63    /// Returns the content with the annotation stripped. If no annotation is
64    /// found, returns the content unchanged.
65    fn strip_annotation(&self, content: &str) -> String {
66        content.to_string()
67    }
68}
69
70/// Detect file format from extension. Returns `None` for unrecognized extensions.
71pub fn detect_format(path: &Path) -> Option<FileFormat> {
72    match path.extension().and_then(|e| e.to_str()) {
73        Some("json") => Some(FileFormat::Json),
74        Some("yaml" | "yml") => Some(FileFormat::Yaml),
75        Some("json5") => Some(FileFormat::Json5),
76        Some("jsonc") => Some(FileFormat::Jsonc),
77        Some("toml") => Some(FileFormat::Toml),
78        Some("md" | "mdx") => Some(FileFormat::Markdown),
79        _ => None,
80    }
81}
82
83/// Return a boxed parser for the given format.
84pub fn parser_for(format: FileFormat) -> Box<dyn Parser> {
85    match format {
86        FileFormat::Json => Box::new(JsonParser),
87        FileFormat::Json5 => Box::new(Json5Parser),
88        FileFormat::Jsonc => Box::new(JsoncParser),
89        FileFormat::Toml => Box::new(TomlParser),
90        FileFormat::Yaml => Box::new(YamlParser),
91        FileFormat::Markdown => Box::new(MarkdownParser),
92    }
93}
94
95/// Insert `"$schema": "URL"` as the first property after `{` in a JSON object.
96///
97/// Uses string manipulation (not parse+reserialize) to preserve formatting.
98pub(crate) fn annotate_json_content(content: &str, schema_url: &str) -> String {
99    let Some(brace_pos) = content.find('{') else {
100        return content.to_string();
101    };
102
103    let after_brace = &content[brace_pos + 1..];
104
105    // Detect if the content is compact (no newline before next non-whitespace)
106    let next_non_ws = after_brace.find(|c: char| !c.is_ascii_whitespace());
107    let has_newline_before_content = after_brace
108        .get(..next_non_ws.unwrap_or(0))
109        .is_some_and(|s| s.contains('\n'));
110
111    if has_newline_before_content {
112        let indent = detect_json_indent(after_brace);
113        format!(
114            "{}{{\n{indent}\"$schema\": \"{schema_url}\",{}",
115            &content[..brace_pos],
116            after_brace
117        )
118    } else {
119        format!(
120            "{}{{\"$schema\":\"{schema_url}\",{}",
121            &content[..brace_pos],
122            after_brace.trim_start()
123        )
124    }
125}
126
127/// Detect the indentation used in a JSON string (the whitespace at the start
128/// of the first content line after the opening brace).
129fn detect_json_indent(after_brace: &str) -> String {
130    for line in after_brace.lines() {
131        let trimmed = line.trim();
132        if trimmed.is_empty() {
133            continue;
134        }
135        let indent_end = line.len() - line.trim_start().len();
136        return line[..indent_end].to_string();
137    }
138    "  ".to_string()
139}
140
141/// Remove the top-level `"$schema"` property from a JSON string.
142///
143/// Uses string manipulation (not parse+reserialize) to preserve formatting.
144pub(crate) fn strip_json_schema_property(content: &str) -> String {
145    let key = "\"$schema\"";
146    let Some(key_start) = content.find(key) else {
147        return content.to_string();
148    };
149
150    let key_end = key_start + key.len();
151    let mut pos = key_end;
152
153    // Skip whitespace (space/tab) between key and colon
154    while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
155        pos += 1;
156    }
157    // Expect colon
158    if content.as_bytes().get(pos) != Some(&b':') {
159        return content.to_string();
160    }
161    pos += 1;
162
163    // Skip whitespace (space/tab) between colon and value
164    while pos < content.len() && matches!(content.as_bytes()[pos], b' ' | b'\t') {
165        pos += 1;
166    }
167    // Expect opening quote
168    if content.as_bytes().get(pos) != Some(&b'"') {
169        return content.to_string();
170    }
171    pos += 1;
172
173    // Read string value until closing quote (handling backslash escapes)
174    while pos < content.len() {
175        match content.as_bytes()[pos] {
176            b'\\' => pos += 2,
177            b'"' => {
178                pos += 1;
179                break;
180            }
181            _ => pos += 1,
182        }
183    }
184    let value_end = pos;
185
186    // Check for trailing comma (with optional space/tab before it)
187    let ws_after = content.as_bytes()[value_end..]
188        .iter()
189        .take_while(|&&b| b == b' ' || b == b'\t')
190        .count();
191    let has_trailing_comma = content.as_bytes().get(value_end + ws_after) == Some(&b',');
192
193    if has_trailing_comma {
194        let remove_end = value_end + ws_after + 1; // past the comma
195        let before = &content[..key_start];
196        if let Some(nl_pos) = before.rfind('\n') {
197            // Pretty-printed: remove from newline to past the comma
198            format!("{}{}", &content[..nl_pos], &content[remove_end..])
199        } else {
200            // Compact: remove key-value+comma and any space/tab after comma
201            let ws_skip = content.as_bytes()[remove_end..]
202                .iter()
203                .take_while(|&&b| b == b' ' || b == b'\t')
204                .count();
205            format!(
206                "{}{}",
207                &content[..key_start],
208                &content[remove_end + ws_skip..]
209            )
210        }
211    } else {
212        // No trailing comma — $schema is the only or last property
213        let before = &content[..key_start];
214        let rtrimmed = before.trim_end();
215        if rtrimmed.ends_with(',') {
216            // Last property: also remove the preceding comma
217            let comma_pos = before.rfind(',').expect("comma before $schema");
218            format!("{}{}", &content[..comma_pos], &content[value_end..])
219        } else if let Some(nl_pos) = before.rfind('\n') {
220            // Only property, pretty-printed
221            format!("{}{}", &content[..nl_pos], &content[value_end..])
222        } else {
223            // Only property, compact
224            format!("{}{}", &content[..key_start], &content[value_end..])
225        }
226    }
227}
228
229/// Convert 1-based line and column to a byte offset in content.
230pub fn line_col_to_offset(content: &str, line: usize, col: usize) -> usize {
231    let mut offset = 0;
232    for (i, l) in content.lines().enumerate() {
233        if i + 1 == line {
234            return offset + col.saturating_sub(1);
235        }
236        offset += l.len() + 1; // +1 for newline
237    }
238    offset.min(content.len())
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    // --- detect_format ---
246
247    #[test]
248    fn detect_format_json() {
249        assert_eq!(detect_format(Path::new("foo.json")), Some(FileFormat::Json));
250    }
251
252    #[test]
253    fn detect_format_yaml() {
254        assert_eq!(detect_format(Path::new("foo.yaml")), Some(FileFormat::Yaml));
255        assert_eq!(detect_format(Path::new("foo.yml")), Some(FileFormat::Yaml));
256    }
257
258    #[test]
259    fn detect_format_json5() {
260        assert_eq!(
261            detect_format(Path::new("foo.json5")),
262            Some(FileFormat::Json5)
263        );
264    }
265
266    #[test]
267    fn detect_format_jsonc() {
268        assert_eq!(
269            detect_format(Path::new("foo.jsonc")),
270            Some(FileFormat::Jsonc)
271        );
272    }
273
274    #[test]
275    fn detect_format_toml() {
276        assert_eq!(detect_format(Path::new("foo.toml")), Some(FileFormat::Toml));
277    }
278
279    #[test]
280    fn detect_format_unknown_returns_none() {
281        assert_eq!(detect_format(Path::new("foo.txt")), None);
282        assert_eq!(detect_format(Path::new("foo")), None);
283        assert_eq!(detect_format(Path::new("devenv.nix")), None);
284    }
285
286    // --- extract_schema_uri via trait ---
287
288    #[test]
289    fn extract_schema_json_with_schema() {
290        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
291        let uri = JsonParser.extract_schema_uri("", &val);
292        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
293    }
294
295    #[test]
296    fn extract_schema_json_without_schema() {
297        let val = serde_json::json!({"key": "value"});
298        let uri = JsonParser.extract_schema_uri("", &val);
299        assert!(uri.is_none());
300    }
301
302    #[test]
303    fn extract_schema_json5_with_schema() {
304        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
305        let uri = Json5Parser.extract_schema_uri("", &val);
306        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
307    }
308
309    #[test]
310    fn extract_schema_jsonc_with_schema() {
311        let val = serde_json::json!({"$schema": "https://example.com/s.json"});
312        let uri = JsoncParser.extract_schema_uri("", &val);
313        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
314    }
315
316    #[test]
317    fn extract_schema_yaml_modeline() {
318        let content = "# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
319        let val = serde_json::json!({"key": "value"});
320        let uri = YamlParser.extract_schema_uri(content, &val);
321        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
322    }
323
324    #[test]
325    fn extract_schema_yaml_modeline_with_leading_blank_lines() {
326        let content = "\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
327        let val = serde_json::json!({"key": "value"});
328        let uri = YamlParser.extract_schema_uri(content, &val);
329        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
330    }
331
332    #[test]
333    fn extract_schema_yaml_modeline_after_other_comment() {
334        let content = "# some comment\n# yaml-language-server: $schema=https://example.com/s.json\nkey: value\n";
335        let val = serde_json::json!({"key": "value"});
336        let uri = YamlParser.extract_schema_uri(content, &val);
337        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
338    }
339
340    #[test]
341    fn extract_schema_yaml_modeline_not_in_body() {
342        let content = "key: value\n# yaml-language-server: $schema=https://example.com/s.json\n";
343        let val = serde_json::json!({"key": "value"});
344        let uri = YamlParser.extract_schema_uri(content, &val);
345        assert!(uri.is_none());
346    }
347
348    #[test]
349    fn extract_schema_yaml_top_level_property() {
350        let content = "$schema: https://example.com/s.json\nkey: value\n";
351        let val = serde_json::json!({"$schema": "https://example.com/s.json", "key": "value"});
352        let uri = YamlParser.extract_schema_uri(content, &val);
353        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
354    }
355
356    #[test]
357    fn extract_schema_yaml_modeline_takes_priority() {
358        let content = "# yaml-language-server: $schema=https://modeline.com/s.json\n$schema: https://property.com/s.json\n";
359        let val = serde_json::json!({"$schema": "https://property.com/s.json"});
360        let uri = YamlParser.extract_schema_uri(content, &val);
361        assert_eq!(uri.as_deref(), Some("https://modeline.com/s.json"));
362    }
363
364    #[test]
365    fn extract_schema_yaml_none() {
366        let content = "key: value\n";
367        let val = serde_json::json!({"key": "value"});
368        let uri = YamlParser.extract_schema_uri(content, &val);
369        assert!(uri.is_none());
370    }
371
372    // --- TOML schema extraction ---
373
374    #[test]
375    fn extract_schema_toml_comment() {
376        let content = "# :schema https://example.com/s.json\nkey = \"value\"\n";
377        let val = serde_json::json!({"key": "value"});
378        let uri = TomlParser.extract_schema_uri(content, &val);
379        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
380    }
381
382    #[test]
383    fn extract_schema_toml_with_leading_blank_lines() {
384        let content = "\n# :schema https://example.com/s.json\nkey = \"value\"\n";
385        let val = serde_json::json!({"key": "value"});
386        let uri = TomlParser.extract_schema_uri(content, &val);
387        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
388    }
389
390    #[test]
391    fn extract_schema_toml_not_in_body() {
392        let content = "key = \"value\"\n# :schema https://example.com/s.json\n";
393        let val = serde_json::json!({"key": "value"});
394        let uri = TomlParser.extract_schema_uri(content, &val);
395        assert!(uri.is_none());
396    }
397
398    #[test]
399    fn extract_schema_toml_none() {
400        let content = "key = \"value\"\n";
401        let val = serde_json::json!({"key": "value"});
402        let uri = TomlParser.extract_schema_uri(content, &val);
403        assert!(uri.is_none());
404    }
405
406    #[test]
407    fn extract_schema_toml_legacy_dollar_schema() {
408        let content = "# $schema: https://example.com/s.json\nkey = \"value\"\n";
409        let val = serde_json::json!({"key": "value"});
410        let uri = TomlParser.extract_schema_uri(content, &val);
411        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
412    }
413
414    // --- line_col_to_offset ---
415
416    #[test]
417    fn line_col_to_offset_first_line() {
418        assert_eq!(line_col_to_offset("hello\nworld", 1, 1), 0);
419        assert_eq!(line_col_to_offset("hello\nworld", 1, 3), 2);
420    }
421
422    #[test]
423    fn line_col_to_offset_second_line() {
424        assert_eq!(line_col_to_offset("hello\nworld", 2, 1), 6);
425        assert_eq!(line_col_to_offset("hello\nworld", 2, 3), 8);
426    }
427
428    // --- parser_for round-trip ---
429
430    #[test]
431    fn parser_for_json_parses() -> anyhow::Result<()> {
432        let p = parser_for(FileFormat::Json);
433        let val = p.parse(r#"{"key":"value"}"#, "test.json")?;
434        assert_eq!(val, serde_json::json!({"key": "value"}));
435        Ok(())
436    }
437
438    #[test]
439    fn parser_for_yaml_parses() -> anyhow::Result<()> {
440        let p = parser_for(FileFormat::Yaml);
441        let val = p.parse("key: value\n", "test.yaml")?;
442        assert_eq!(val, serde_json::json!({"key": "value"}));
443        Ok(())
444    }
445
446    #[test]
447    fn parser_for_json5_parses() -> anyhow::Result<()> {
448        let p = parser_for(FileFormat::Json5);
449        let val = p.parse(r#"{key: "value"}"#, "test.json5")?;
450        assert_eq!(val, serde_json::json!({"key": "value"}));
451        Ok(())
452    }
453
454    #[test]
455    fn parser_for_jsonc_parses() -> anyhow::Result<()> {
456        let p = parser_for(FileFormat::Jsonc);
457        let val = p.parse(r#"{"key": "value" /* comment */}"#, "test.jsonc")?;
458        assert_eq!(val, serde_json::json!({"key": "value"}));
459        Ok(())
460    }
461
462    #[test]
463    fn parser_for_toml_parses() -> anyhow::Result<()> {
464        let p = parser_for(FileFormat::Toml);
465        let val = p.parse("key = \"value\"\n", "test.toml")?;
466        assert_eq!(val, serde_json::json!({"key": "value"}));
467        Ok(())
468    }
469}