lintel_validate/parsers/
markdown.rs1use serde_json::Value;
2
3use crate::diagnostics::ParseDiagnostic;
4
5use super::Parser;
6
7pub struct MarkdownParser;
8
9fn skip_html_comments(content: &str) -> (&str, usize) {
12 let mut s = content.trim_start();
13 let mut offset = content.len() - s.len();
14
15 while s.starts_with("<!--") {
16 if let Some(end) = s.find("-->") {
17 let after = &s[end + 3..];
18 let trimmed = after.trim_start();
19 offset += s.len() - trimmed.len();
20 s = trimmed;
21 } else {
22 break;
24 }
25 }
26
27 (s, offset)
28}
29
30fn extract_yaml_frontmatter(content: &str) -> Option<(&str, usize)> {
32 let (trimmed, offset) = skip_html_comments(content);
33
34 if !trimmed.starts_with("---") {
35 return None;
36 }
37
38 let after_open = &trimmed[3..];
39 let after_newline = after_open
41 .strip_prefix('\n')
42 .or_else(|| after_open.strip_prefix("\r\n"))?;
43
44 let front_start = offset + 3 + (after_open.len() - after_newline.len());
45
46 let closing = after_newline.find("\n---")?;
48 let frontmatter = &after_newline[..closing];
49
50 Some((frontmatter, front_start))
51}
52
53fn extract_toml_frontmatter(content: &str) -> Option<(&str, usize)> {
55 let (trimmed, offset) = skip_html_comments(content);
56
57 if !trimmed.starts_with("+++") {
58 return None;
59 }
60
61 let after_open = &trimmed[3..];
62 let after_newline = after_open
63 .strip_prefix('\n')
64 .or_else(|| after_open.strip_prefix("\r\n"))?;
65
66 let front_start = offset + 3 + (after_open.len() - after_newline.len());
67
68 let closing = after_newline.find("\n+++")?;
69 let frontmatter = &after_newline[..closing];
70
71 Some((frontmatter, front_start))
72}
73
74impl Parser for MarkdownParser {
75 fn parse(&self, content: &str, file_name: &str) -> Result<Value, ParseDiagnostic> {
76 if let Some((frontmatter, offset)) = extract_yaml_frontmatter(content) {
78 return serde_yaml::from_str(frontmatter).map_err(|e| {
79 let span = e.location().map_or(offset, |loc| offset + loc.index());
80 ParseDiagnostic {
81 src: miette::NamedSource::new(file_name, content.to_string()),
82 span: span.into(),
83 message: format!("YAML frontmatter: {e}"),
84 }
85 });
86 }
87
88 if let Some((frontmatter, offset)) = extract_toml_frontmatter(content) {
90 let toml_value: toml::Value = toml::from_str(frontmatter).map_err(|e| {
91 let span = e.span().map_or(offset, |s| offset + s.start);
92 ParseDiagnostic {
93 src: miette::NamedSource::new(file_name, content.to_string()),
94 span: span.into(),
95 message: format!("TOML frontmatter: {e}"),
96 }
97 })?;
98 return serde_json::to_value(toml_value).map_err(|e| ParseDiagnostic {
99 src: miette::NamedSource::new(file_name, content.to_string()),
100 span: offset.into(),
101 message: format!("TOML frontmatter conversion: {e}"),
102 });
103 }
104
105 Ok(Value::Null)
107 }
108
109 fn extract_schema_uri(&self, content: &str, value: &Value) -> Option<String> {
110 if let Some(uri) = value.get("$schema").and_then(Value::as_str) {
112 return Some(uri.to_string());
113 }
114
115 for line in content.lines() {
118 let trimmed = line.trim();
119 if trimmed.is_empty() {
120 continue;
121 }
122 if let Some(rest) = trimmed.strip_prefix("<!--") {
123 let rest = rest.trim();
124 if let Some(rest) = rest.strip_prefix("$schema:") {
125 let rest = rest.trim().trim_end_matches("-->").trim();
126 if !rest.is_empty() {
127 return Some(rest.to_string());
128 }
129 }
130 }
131 if trimmed == "---" || trimmed == "+++" {
133 break;
134 }
135 }
136
137 None
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn parse_yaml_frontmatter() -> anyhow::Result<()> {
147 let content = "---\nname: test\ndescription: hello\n---\n# Body\n";
148 let val = MarkdownParser.parse(content, "test.md")?;
149 assert_eq!(val["name"], "test");
150 assert_eq!(val["description"], "hello");
151 Ok(())
152 }
153
154 #[test]
155 fn parse_toml_frontmatter() -> anyhow::Result<()> {
156 let content = "+++\nname = \"test\"\n+++\n# Body\n";
157 let val = MarkdownParser.parse(content, "test.md")?;
158 assert_eq!(val["name"], "test");
159 Ok(())
160 }
161
162 #[test]
163 fn no_frontmatter_returns_null() -> anyhow::Result<()> {
164 let content = "# Just a heading\nSome text\n";
165 let val = MarkdownParser.parse(content, "test.md")?;
166 assert!(val.is_null());
167 Ok(())
168 }
169
170 #[test]
171 fn extract_schema_from_frontmatter_value() {
172 let val = serde_json::json!({"$schema": "https://example.com/s.json", "name": "test"});
173 let uri = MarkdownParser.extract_schema_uri("---\n$schema: ...\n---\n", &val);
174 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
175 }
176
177 #[test]
178 fn extract_schema_from_html_comment() {
179 let content = "<!-- $schema: https://example.com/s.json -->\n---\nname: test\n---\n";
180 let val = serde_json::json!({"name": "test"});
181 let uri = MarkdownParser.extract_schema_uri(content, &val);
182 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
183 }
184
185 #[test]
186 fn yaml_frontmatter_with_leading_html_comment() -> anyhow::Result<()> {
187 let content =
188 "<!-- $schema: https://example.com/s.json -->\n---\nname: test\n---\n# Body\n";
189 let val = MarkdownParser.parse(content, "test.md")?;
190 assert_eq!(val["name"], "test");
191 Ok(())
192 }
193
194 #[test]
195 fn toml_frontmatter_with_leading_html_comment() -> anyhow::Result<()> {
196 let content =
197 "<!-- $schema: https://example.com/s.json -->\n+++\nname = \"test\"\n+++\n# Body\n";
198 let val = MarkdownParser.parse(content, "test.md")?;
199 assert_eq!(val["name"], "test");
200 Ok(())
201 }
202
203 #[test]
204 fn html_comment_schema_plus_yaml_frontmatter() -> anyhow::Result<()> {
205 let content =
206 "<!-- $schema: https://example.com/s.json -->\n---\nname: researcher\n---\n# Body\n";
207 let val = MarkdownParser.parse(content, "test.md")?;
208 assert_eq!(val["name"], "researcher");
209 let uri = MarkdownParser.extract_schema_uri(content, &val);
210 assert_eq!(uri.as_deref(), Some("https://example.com/s.json"));
211 Ok(())
212 }
213
214 #[test]
215 fn multiple_html_comments_before_frontmatter() -> anyhow::Result<()> {
216 let content = "<!-- comment 1 -->\n<!-- comment 2 -->\n---\nname: test\n---\n";
217 let val = MarkdownParser.parse(content, "test.md")?;
218 assert_eq!(val["name"], "test");
219 Ok(())
220 }
221
222 #[test]
223 fn yaml_frontmatter_with_complex_values() -> anyhow::Result<()> {
224 let content = "---\nname: my-skill\nallowed-tools:\n - Bash\n - Read\n---\n# Body\n";
225 let val = MarkdownParser.parse(content, "test.md")?;
226 assert_eq!(val["name"], "my-skill");
227 assert!(val["allowed-tools"].is_array());
228 Ok(())
229 }
230}