Skip to main content

lintel_validate/parsers/
markdown.rs

1use serde_json::Value;
2
3use crate::diagnostics::ParseDiagnostic;
4
5use super::Parser;
6
7pub struct MarkdownParser;
8
9/// Skip leading whitespace and HTML comments (`<!-- ... -->`).
10/// Returns the remaining content and the byte offset into the original string.
11fn skip_html_comments(content: &str) -> (&str, usize) {
12    let mut s = content.trim_start();
13    let mut offset = content.len() - s.len();
14
15    while s.starts_with("<!--") {
16        if let Some(end) = s.find("-->") {
17            let after = &s[end + 3..];
18            let trimmed = after.trim_start();
19            offset += s.len() - trimmed.len();
20            s = trimmed;
21        } else {
22            // Unclosed comment — stop skipping
23            break;
24        }
25    }
26
27    (s, offset)
28}
29
30/// Extract YAML frontmatter delimited by `---`.
31fn extract_yaml_frontmatter(content: &str) -> Option<(&str, usize)> {
32    let (trimmed, offset) = skip_html_comments(content);
33
34    if !trimmed.starts_with("---") {
35        return None;
36    }
37
38    let after_open = &trimmed[3..];
39    // The opening --- must be followed by a newline
40    let after_newline = after_open
41        .strip_prefix('\n')
42        .or_else(|| after_open.strip_prefix("\r\n"))?;
43
44    let front_start = offset + 3 + (after_open.len() - after_newline.len());
45
46    // Find closing ---
47    let closing = after_newline.find("\n---")?;
48    let frontmatter = &after_newline[..closing];
49
50    Some((frontmatter, front_start))
51}
52
53/// Extract TOML frontmatter delimited by `+++`.
54fn extract_toml_frontmatter(content: &str) -> Option<(&str, usize)> {
55    let (trimmed, offset) = skip_html_comments(content);
56
57    if !trimmed.starts_with("+++") {
58        return None;
59    }
60
61    let after_open = &trimmed[3..];
62    let after_newline = after_open
63        .strip_prefix('\n')
64        .or_else(|| after_open.strip_prefix("\r\n"))?;
65
66    let front_start = offset + 3 + (after_open.len() - after_newline.len());
67
68    let closing = after_newline.find("\n+++")?;
69    let frontmatter = &after_newline[..closing];
70
71    Some((frontmatter, front_start))
72}
73
74impl Parser for MarkdownParser {
75    fn parse(&self, content: &str, file_name: &str) -> Result<Value, ParseDiagnostic> {
76        // Try YAML frontmatter first (---)
77        if let Some((frontmatter, offset)) = extract_yaml_frontmatter(content) {
78            return serde_yaml::from_str(frontmatter).map_err(|e| {
79                let span = e.location().map_or(offset, |loc| offset + loc.index());
80                ParseDiagnostic {
81                    src: miette::NamedSource::new(file_name, content.to_string()),
82                    span: span.into(),
83                    message: format!("YAML frontmatter: {e}"),
84                }
85            });
86        }
87
88        // Try TOML frontmatter (+++)
89        if let Some((frontmatter, offset)) = extract_toml_frontmatter(content) {
90            let toml_value: toml::Value = toml::from_str(frontmatter).map_err(|e| {
91                let span = e.span().map_or(offset, |s| offset + s.start);
92                ParseDiagnostic {
93                    src: miette::NamedSource::new(file_name, content.to_string()),
94                    span: span.into(),
95                    message: format!("TOML frontmatter: {e}"),
96                }
97            })?;
98            return serde_json::to_value(toml_value).map_err(|e| ParseDiagnostic {
99                src: miette::NamedSource::new(file_name, content.to_string()),
100                span: offset.into(),
101                message: format!("TOML frontmatter conversion: {e}"),
102            });
103        }
104
105        // No frontmatter found — return null so it gets skipped
106        Ok(Value::Null)
107    }
108
109    fn extract_schema_uri(&self, content: &str, value: &Value) -> Option<String> {
110        // Check for $schema in frontmatter value
111        if let Some(uri) = value.get("$schema").and_then(Value::as_str) {
112            return Some(uri.to_string());
113        }
114
115        // Check for schema comment before frontmatter
116        // e.g. <!-- $schema: https://... -->
117        for line in content.lines() {
118            let trimmed = line.trim();
119            if trimmed.is_empty() {
120                continue;
121            }
122            if let Some(rest) = trimmed.strip_prefix("<!--") {
123                let rest = rest.trim();
124                if let Some(rest) = rest.strip_prefix("$schema:") {
125                    let rest = rest.trim().trim_end_matches("-->").trim();
126                    if !rest.is_empty() {
127                        return Some(rest.to_string());
128                    }
129                }
130            }
131            // Stop at frontmatter delimiter
132            if trimmed == "---" || trimmed == "+++" {
133                break;
134            }
135        }
136
137        None
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn parse_yaml_frontmatter() -> anyhow::Result<()> {
147        let content = "---\nname: test\ndescription: hello\n---\n# Body\n";
148        let val = MarkdownParser.parse(content, "test.md")?;
149        assert_eq!(val["name"], "test");
150        assert_eq!(val["description"], "hello");
151        Ok(())
152    }
153
154    #[test]
155    fn parse_toml_frontmatter() -> anyhow::Result<()> {
156        let content = "+++\nname = \"test\"\n+++\n# Body\n";
157        let val = MarkdownParser.parse(content, "test.md")?;
158        assert_eq!(val["name"], "test");
159        Ok(())
160    }
161
162    #[test]
163    fn no_frontmatter_returns_null() -> anyhow::Result<()> {
164        let content = "# Just a heading\nSome text\n";
165        let val = MarkdownParser.parse(content, "test.md")?;
166        assert!(val.is_null());
167        Ok(())
168    }
169
170    #[test]
171    fn extract_schema_from_frontmatter_value() {
172        let val = serde_json::json!({"$schema": "https://example.com/s.json", "name": "test"});
173        let uri = MarkdownParser.extract_schema_uri("---\n$schema: ...\n---\n", &val);
174        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
175    }
176
177    #[test]
178    fn extract_schema_from_html_comment() {
179        let content = "<!-- $schema: https://example.com/s.json -->\n---\nname: test\n---\n";
180        let val = serde_json::json!({"name": "test"});
181        let uri = MarkdownParser.extract_schema_uri(content, &val);
182        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
183    }
184
185    #[test]
186    fn yaml_frontmatter_with_leading_html_comment() -> anyhow::Result<()> {
187        let content =
188            "<!-- $schema: https://example.com/s.json -->\n---\nname: test\n---\n# Body\n";
189        let val = MarkdownParser.parse(content, "test.md")?;
190        assert_eq!(val["name"], "test");
191        Ok(())
192    }
193
194    #[test]
195    fn toml_frontmatter_with_leading_html_comment() -> anyhow::Result<()> {
196        let content =
197            "<!-- $schema: https://example.com/s.json -->\n+++\nname = \"test\"\n+++\n# Body\n";
198        let val = MarkdownParser.parse(content, "test.md")?;
199        assert_eq!(val["name"], "test");
200        Ok(())
201    }
202
203    #[test]
204    fn html_comment_schema_plus_yaml_frontmatter() -> anyhow::Result<()> {
205        let content =
206            "<!-- $schema: https://example.com/s.json -->\n---\nname: researcher\n---\n# Body\n";
207        let val = MarkdownParser.parse(content, "test.md")?;
208        assert_eq!(val["name"], "researcher");
209        let uri = MarkdownParser.extract_schema_uri(content, &val);
210        assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
211        Ok(())
212    }
213
214    #[test]
215    fn multiple_html_comments_before_frontmatter() -> anyhow::Result<()> {
216        let content = "<!-- comment 1 -->\n<!-- comment 2 -->\n---\nname: test\n---\n";
217        let val = MarkdownParser.parse(content, "test.md")?;
218        assert_eq!(val["name"], "test");
219        Ok(())
220    }
221
222    #[test]
223    fn yaml_frontmatter_with_complex_values() -> anyhow::Result<()> {
224        let content = "---\nname: my-skill\nallowed-tools:\n  - Bash\n  - Read\n---\n# Body\n";
225        let val = MarkdownParser.parse(content, "test.md")?;
226        assert_eq!(val["name"], "my-skill");
227        assert!(val["allowed-tools"].is_array());
228        Ok(())
229    }
230}