mdlint/markdown/
parser.rs1use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
2use std::collections::HashSet;
3use std::ops::Range;
4
5pub struct MarkdownParser<'a> {
6 content: &'a str,
7 lines: Vec<&'a str>,
8}
9
10impl<'a> MarkdownParser<'a> {
11 pub fn new(content: &'a str) -> Self {
12 let lines = content.lines().collect();
13 Self { content, lines }
14 }
15
16 pub fn content(&self) -> &'a str {
17 self.content
18 }
19
20 pub fn lines(&self) -> &[&'a str] {
21 &self.lines
22 }
23
24 pub fn line_count(&self) -> usize {
25 self.lines.len()
26 }
27
28 pub fn get_line(&self, line_num: usize) -> Option<&'a str> {
29 if line_num > 0 && line_num <= self.lines.len() {
30 Some(self.lines[line_num - 1])
31 } else {
32 None
33 }
34 }
35
36 pub fn parse(&self) -> impl Iterator<Item = Event<'a>> + 'a {
37 Parser::new_ext(self.content, Self::options())
38 }
39
40 pub fn parse_with_offsets(&self) -> impl Iterator<Item = (Event<'a>, Range<usize>)> {
41 Parser::new_ext(self.content, Self::options()).into_offset_iter()
42 }
43
44 fn options() -> Options {
45 let mut options = Options::empty();
46 options.insert(Options::ENABLE_TABLES);
47 options.insert(Options::ENABLE_FOOTNOTES);
48 options.insert(Options::ENABLE_STRIKETHROUGH);
49 options.insert(Options::ENABLE_TASKLISTS);
50 options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
51 options
52 }
53
54 pub fn offset_to_line(&self, offset: usize) -> usize {
55 self.offset_to_position(offset).0
56 }
57
58 pub fn offset_to_position(&self, offset: usize) -> (usize, usize) {
59 let mut current_offset = 0;
60 for (line_num, line) in self.lines.iter().enumerate() {
61 let line_len = line.len() + 1;
62 if offset < current_offset + line_len {
63 let column = offset - current_offset + 1;
64 return (line_num + 1, column);
65 }
66 current_offset += line_len;
67 }
68 (self.lines.len(), 1)
69 }
70
71 pub fn get_code_line_numbers(&self) -> HashSet<usize> {
77 let mut code_lines = HashSet::new();
78 let mut in_code_block = false;
79
80 for (event, range) in self.parse_with_offsets() {
81 match event {
82 Event::Start(Tag::CodeBlock(_)) => {
83 in_code_block = true;
84 let start_line = self.offset_to_line(range.start);
86 let end_line = self.offset_to_line(range.end);
87 for line in start_line..=end_line {
88 code_lines.insert(line);
89 }
90 }
91 Event::End(TagEnd::CodeBlock) => {
92 in_code_block = false;
93 }
94 Event::Code(_) => {
95 let start_line = self.offset_to_line(range.start);
98 let end_line = self.offset_to_line(range.end);
99 for line in start_line..=end_line {
100 code_lines.insert(line);
101 }
102 }
103 _ => {
104 if in_code_block {
106 let start_line = self.offset_to_line(range.start);
107 let end_line = self.offset_to_line(range.end);
108 for line in start_line..=end_line {
109 code_lines.insert(line);
110 }
111 }
112 }
113 }
114 }
115
116 code_lines
117 }
118
119 pub fn get_code_block_line_numbers(&self) -> HashSet<usize> {
124 let mut code_lines = HashSet::new();
125 let mut in_code_block = false;
126
127 for (event, range) in self.parse_with_offsets() {
128 match event {
129 Event::Start(Tag::CodeBlock(_)) => {
130 in_code_block = true;
131 let start_line = self.offset_to_line(range.start);
133 let end_line = self.offset_to_line(range.end);
134 for line in start_line..=end_line {
135 code_lines.insert(line);
136 }
137 }
138 Event::End(TagEnd::CodeBlock) => {
139 in_code_block = false;
140 }
141 _ => {
142 if in_code_block {
144 let start_line = self.offset_to_line(range.start);
145 let end_line = self.offset_to_line(range.end);
146 for line in start_line..=end_line {
147 code_lines.insert(line);
148 }
149 }
150 }
151 }
152 }
153
154 code_lines
155 }
156
157 pub fn get_code_ranges(&self) -> Vec<Range<usize>> {
160 let mut code_ranges = Vec::new();
161 let mut in_code_block = false;
162 let mut code_block_start = 0;
163
164 for (event, range) in self.parse_with_offsets() {
165 match event {
166 Event::Start(Tag::CodeBlock(_)) => {
167 in_code_block = true;
168 code_block_start = range.start;
169 }
170 Event::End(TagEnd::CodeBlock) => {
171 if in_code_block {
172 code_ranges.push(code_block_start..range.end);
173 in_code_block = false;
174 }
175 }
176 Event::Code(_) => {
177 code_ranges.push(range);
179 }
180 _ => {}
181 }
182 }
183
184 code_ranges
185 }
186
187 pub fn line_offset_to_absolute(&self, line_num: usize, byte_offset_in_line: usize) -> usize {
190 let mut current_offset = 0;
191 for (i, line) in self.lines.iter().enumerate() {
192 if i + 1 == line_num {
193 return current_offset + byte_offset_in_line;
194 }
195 current_offset += line.len() + 1; }
197 current_offset
198 }
199
200 pub fn is_heading(&self, event: &Event) -> bool {
201 matches!(event, Event::Start(Tag::Heading { .. }))
202 }
203
204 pub fn is_code_block(&self, event: &Event) -> bool {
205 matches!(event, Event::Start(Tag::CodeBlock(_)))
206 }
207
208 pub fn is_list(&self, event: &Event) -> bool {
209 matches!(event, Event::Start(Tag::List(_)))
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn test_basic_parsing() {
219 let content = "# Heading\n\nSome **bold** text.";
220 let parser = MarkdownParser::new(content);
221
222 assert_eq!(parser.content(), content);
223 assert_eq!(parser.line_count(), 3);
224 }
225
226 #[test]
227 fn test_get_line() {
228 let content = "Line 1\nLine 2\nLine 3";
229 let parser = MarkdownParser::new(content);
230
231 assert_eq!(parser.get_line(1), Some("Line 1"));
232 assert_eq!(parser.get_line(2), Some("Line 2"));
233 assert_eq!(parser.get_line(3), Some("Line 3"));
234 assert_eq!(parser.get_line(0), None);
235 assert_eq!(parser.get_line(4), None);
236 }
237
238 #[test]
239 fn test_offset_to_line() {
240 let content = "Line 1\nLine 2\nLine 3";
241 let parser = MarkdownParser::new(content);
242
243 assert_eq!(parser.offset_to_line(0), 1);
244 assert_eq!(parser.offset_to_line(3), 1);
245 assert_eq!(parser.offset_to_line(7), 2);
246 assert_eq!(parser.offset_to_line(14), 3);
247 }
248
249 #[test]
250 fn test_offset_to_position() {
251 let content = "Line 1\nLine 2\nLine 3";
252 let parser = MarkdownParser::new(content);
253
254 assert_eq!(parser.offset_to_position(0), (1, 1));
255 assert_eq!(parser.offset_to_position(3), (1, 4));
256 assert_eq!(parser.offset_to_position(7), (2, 1));
257 }
258
259 #[test]
260 fn test_parse_events() {
261 let content = "# Heading";
262 let parser = MarkdownParser::new(content);
263
264 let events: Vec<_> = parser.parse().collect();
265 assert!(!events.is_empty());
266 assert!(parser.is_heading(&events[0]));
267 }
268
269 #[test]
270 fn test_parse_with_offsets() {
271 let content = "# Heading\n\nParagraph";
272 let parser = MarkdownParser::new(content);
273
274 let events: Vec<_> = parser.parse_with_offsets().collect();
275 assert!(!events.is_empty());
276 }
277
278 #[test]
279 fn test_event_type_checks() {
280 let content = "# Heading\n\n```rust\ncode\n```\n\n- item";
281 let parser = MarkdownParser::new(content);
282
283 let events: Vec<_> = parser.parse().collect();
284
285 let has_heading = events.iter().any(|e| parser.is_heading(e));
286 let has_code = events.iter().any(|e| parser.is_code_block(e));
287 let has_list = events.iter().any(|e| parser.is_list(e));
288
289 assert!(has_heading);
290 assert!(has_code);
291 assert!(has_list);
292 }
293
294 #[test]
295 fn test_code_line_numbers_fenced() {
296 let content = "Normal text\n\n```sql\nSELECT * FROM table_name\nWHERE user_id = 123\n```\n\nMore text";
297 let parser = MarkdownParser::new(content);
298 let code_lines = parser.get_code_line_numbers();
299
300 assert!(
302 code_lines.contains(&3),
303 "Line 3 (opening ```) should be code"
304 );
305 assert!(
306 code_lines.contains(&4),
307 "Line 4 (code content) should be code"
308 );
309 assert!(
310 code_lines.contains(&5),
311 "Line 5 (code content) should be code"
312 );
313 assert!(
314 code_lines.contains(&6),
315 "Line 6 (closing ```) should be code"
316 );
317
318 assert!(!code_lines.contains(&1), "Line 1 should not be code");
320 assert!(!code_lines.contains(&2), "Line 2 should not be code");
321 assert!(!code_lines.contains(&8), "Line 8 should not be code");
322 }
323
324 #[test]
325 fn test_code_line_numbers_inline() {
326 let content = "This is `inline_code_with_underscores` in text";
327 let parser = MarkdownParser::new(content);
328 let code_lines = parser.get_code_line_numbers();
329
330 assert!(
332 code_lines.contains(&1),
333 "Line with inline code should be marked"
334 );
335 }
336
337 #[test]
338 fn test_code_line_numbers_mixed() {
339 let content =
340 "Normal text\n\nText with `inline_code` here\n\n```\nCode block\n```\n\nFinal text";
341 let parser = MarkdownParser::new(content);
342 let code_lines = parser.get_code_line_numbers();
343
344 assert!(
346 code_lines.contains(&3),
347 "Line with inline code should be marked"
348 );
349
350 assert!(code_lines.contains(&5), "Code block line should be marked");
352 assert!(code_lines.contains(&6), "Code block line should be marked");
353 assert!(code_lines.contains(&7), "Code block line should be marked");
354
355 assert!(
357 !code_lines.contains(&1),
358 "Normal text line should not be marked"
359 );
360 assert!(!code_lines.contains(&2), "Empty line should not be marked");
361 assert!(
362 !code_lines.contains(&9),
363 "Normal text line should not be marked"
364 );
365 }
366}