1use crate::error::{MdBookLintError, Result};
2use comrak::nodes::{AstNode, NodeValue};
3use comrak::{Arena, ComrakOptions, parse_document};
4use std::path::PathBuf;
5
6#[derive(Debug)]
8pub struct Document {
9 pub content: String,
11 pub path: PathBuf,
13 pub lines: Vec<String>,
15}
16
17impl Document {
18 pub fn new(content: String, path: PathBuf) -> Result<Self> {
20 let lines: Vec<String> = content.lines().map(|s| s.to_owned()).collect();
25
26 Ok(Document {
27 content,
28 path,
29 lines,
30 })
31 }
32
33 pub fn parse_ast<'a>(&self, arena: &'a Arena<AstNode<'a>>) -> &'a AstNode<'a> {
35 let mut options = ComrakOptions::default();
37 options.extension.strikethrough = true;
38 options.extension.tagfilter = false;
39 options.extension.table = true;
40 options.extension.autolink = true;
41 options.extension.tasklist = true;
42 options.extension.superscript = false;
43 options.extension.header_ids = None;
44 options.extension.footnotes = true;
45 options.extension.description_lists = true;
46 options.extension.front_matter_delimiter = Some("---".to_owned());
47 options.parse.smart = false;
48 options.parse.default_info_string = None;
49 options.parse.relaxed_tasklist_matching = false;
50 options.parse.relaxed_autolinks = false;
51
52 parse_document(arena, &self.content, &options)
53 }
54
55 pub fn parse_ast_with_context<'a>(
57 &self,
58 arena: &'a Arena<AstNode<'a>>,
59 ) -> Result<&'a AstNode<'a>> {
60 let ast = self.parse_ast(arena);
62
63 if ast.children().count() == 0 && !self.content.trim().is_empty() {
65 return Err(MdBookLintError::Document(
66 "Failed to parse document AST - no content nodes found".to_string(),
67 ));
68 }
69
70 Ok(ast)
71 }
72
73 pub fn line_number_at_offset(&self, offset: usize) -> usize {
75 let mut current_offset = 0;
76 for (line_idx, line) in self.lines.iter().enumerate() {
77 if current_offset + line.len() >= offset {
78 return line_idx + 1; }
80 current_offset += line.len() + 1; }
82 self.lines.len() }
84
85 pub fn column_number_at_offset(&self, offset: usize) -> usize {
87 let mut current_offset = 0;
88 for line in &self.lines {
89 if current_offset + line.len() >= offset {
90 return offset - current_offset + 1; }
92 current_offset += line.len() + 1; }
94 1 }
96
97 pub fn headings<'a>(&self, ast: &'a AstNode<'a>) -> Vec<&'a AstNode<'a>> {
99 let mut headings = Vec::new();
100 self.collect_headings(ast, &mut headings);
101 headings
102 }
103
104 pub fn headings_with_context<'a>(&self, ast: &'a AstNode<'a>) -> Result<Vec<&'a AstNode<'a>>> {
106 let headings = self.headings(ast);
107 Ok(headings)
108 }
109
110 pub fn code_blocks<'a>(&self, ast: &'a AstNode<'a>) -> Vec<&'a AstNode<'a>> {
112 let mut code_blocks = Vec::new();
113 self.collect_code_blocks(ast, &mut code_blocks);
114 code_blocks
115 }
116
117 pub fn code_blocks_with_context<'a>(
119 &self,
120 ast: &'a AstNode<'a>,
121 ) -> Result<Vec<&'a AstNode<'a>>> {
122 let code_blocks = self.code_blocks(ast);
123 Ok(code_blocks)
124 }
125
126 #[allow(clippy::only_used_in_recursion)]
128 fn collect_headings<'a>(&self, node: &'a AstNode<'a>, result: &mut Vec<&'a AstNode<'a>>) {
129 if let NodeValue::Heading(..) = &node.data.borrow().value {
130 result.push(node)
131 }
132
133 for child in node.children() {
135 self.collect_headings(child, result);
136 }
137 }
138
139 #[allow(clippy::only_used_in_recursion)]
141 fn collect_code_blocks<'a>(&self, node: &'a AstNode<'a>, result: &mut Vec<&'a AstNode<'a>>) {
142 if let NodeValue::CodeBlock(..) = &node.data.borrow().value {
143 result.push(node)
144 }
145
146 for child in node.children() {
148 self.collect_code_blocks(child, result);
149 }
150 }
151
152 pub fn heading_level<'a>(node: &'a AstNode<'a>) -> Option<u32> {
154 match &node.data.borrow().value {
155 NodeValue::Heading(heading) => Some(heading.level.into()),
156 _ => None,
157 }
158 }
159
160 pub fn node_text<'a>(&self, node: &'a AstNode<'a>) -> String {
162 let mut text = String::new();
163 self.collect_text(node, &mut text);
164 text
165 }
166
167 #[allow(clippy::only_used_in_recursion)]
169 fn collect_text<'a>(&self, node: &'a AstNode<'a>, text: &mut String) {
170 match &node.data.borrow().value {
171 NodeValue::Text(t) => text.push_str(t),
172 NodeValue::Code(code) => text.push_str(&code.literal),
173 _ => {
174 for child in node.children() {
175 self.collect_text(child, text);
176 }
177 }
178 }
179 }
180
181 pub fn node_position<'a>(&self, node: &'a AstNode<'a>) -> Option<(usize, usize)> {
183 let sourcepos = node.data.borrow().sourcepos;
184 if sourcepos.start.line > 0 {
185 Some((sourcepos.start.line, sourcepos.start.column))
186 } else {
187 None
188 }
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::*;
195 use comrak::Arena;
196 use std::path::PathBuf;
197
198 #[test]
199 fn test_document_creation() {
200 let content = "# Test\n\nThis is a test.".to_string();
201 let path = PathBuf::from("test.md");
202
203 let doc = Document::new(content, path).expect("Failed to create document");
204
205 assert_eq!(doc.lines.len(), 3);
206 assert_eq!(doc.lines[0], "# Test");
207 assert_eq!(doc.lines[1], "");
208 assert_eq!(doc.lines[2], "This is a test.");
209 }
210
211 #[test]
212 fn test_empty_document_allowed() {
213 let content = "".to_string();
214 let path = PathBuf::from("empty.md");
215
216 let result = Document::new(content, path);
217 assert!(result.is_ok());
218
219 let document = result.unwrap();
220 assert_eq!(document.content, "");
221 assert_eq!(document.lines.len(), 0);
222 assert_eq!(document.path, PathBuf::from("empty.md"));
223 }
224
225 #[test]
226 fn test_whitespace_only_document_allowed() {
227 let content = " \n \n ".to_string();
228 let path = PathBuf::from("whitespace.md");
229
230 let result = Document::new(content, path);
231 assert!(result.is_ok());
232
233 let document = result.unwrap();
234 assert_eq!(document.content, " \n \n ");
235 assert_eq!(document.lines.len(), 3);
236 assert_eq!(document.path, PathBuf::from("whitespace.md"));
237 }
238
239 #[test]
240 fn test_line_number_calculation() {
241 let content = "Line 1\nLine 2\nLine 3".to_string();
242 let path = PathBuf::from("test.md");
243
244 let doc = Document::new(content, path).expect("Failed to create document");
245
246 assert_eq!(doc.line_number_at_offset(0), 1); assert_eq!(doc.line_number_at_offset(7), 2); assert_eq!(doc.line_number_at_offset(14), 3); }
250
251 #[test]
252 fn test_heading_extraction() {
253 let content = "# H1\n## H2\n### H3\nText".to_string();
254 let path = PathBuf::from("test.md");
255
256 let doc = Document::new(content, path).expect("Failed to create document");
257 let arena = Arena::new();
258 let ast = doc.parse_ast(&arena);
259 let headings = doc.headings(ast);
260
261 assert_eq!(headings.len(), 3);
262
263 assert_eq!(Document::heading_level(headings[0]), Some(1));
264 assert_eq!(Document::heading_level(headings[1]), Some(2));
265 assert_eq!(Document::heading_level(headings[2]), Some(3));
266 }
267
268 #[test]
269 fn test_document_with_unicode() {
270 let content = "# 标题\n\n这是一个测试。\n\n```rust\nfn main() {\n println!(\"Hello, 世界!\");\n}\n```".to_string();
271 let path = PathBuf::from("unicode.md");
272
273 let doc = Document::new(content, path).expect("Failed to create document");
274 assert!(doc.content.contains("标题"));
275 assert!(doc.content.contains("世界"));
276 assert_eq!(doc.lines.len(), 9);
277 }
278
279 #[test]
280 fn test_document_with_very_long_lines() {
281 let long_line = "a".repeat(10000);
282 let content = format!("# Test\n\n{long_line}\n\nEnd");
283 let path = PathBuf::from("long.md");
284
285 let doc = Document::new(content, path).expect("Failed to create document");
286 assert_eq!(doc.lines.len(), 5);
287 assert_eq!(doc.lines[2].len(), 10000);
288 }
289
290 #[test]
291 fn test_document_with_mixed_line_endings() {
292 let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4".to_string();
293 let path = PathBuf::from("mixed.md");
294
295 let doc = Document::new(content, path).expect("Failed to create document");
296 assert_eq!(doc.lines.len(), 4);
298 assert_eq!(doc.lines[0], "Line 1");
299 assert_eq!(doc.lines[1], "Line 2");
300 assert_eq!(doc.lines[2], "Line 3");
301 assert_eq!(doc.lines[3], "Line 4");
302 }
303
304 #[test]
305 fn test_document_with_only_newlines() {
306 let content = "\n\n\n\n".to_string();
307 let path = PathBuf::from("newlines.md");
308
309 let doc = Document::new(content, path).expect("Failed to create document");
310 assert_eq!(doc.lines.len(), 4);
311 for line in &doc.lines {
312 assert_eq!(line, "");
313 }
314 }
315
316 #[test]
317 fn test_node_position_edge_cases() {
318 let content = "# Test\n\n```rust\ncode\n```\n\n- Item".to_string();
319 let path = PathBuf::from("test.md");
320
321 let doc = Document::new(content, path).expect("Failed to create document");
322 let arena = Arena::new();
323 let ast = doc.parse_ast(&arena);
324
325 let position = doc.node_position(ast);
327 assert!(position.is_some());
328 let (line, col) = position.unwrap();
329 assert!(line >= 1);
330 assert!(col >= 1);
331 }
332
333 #[test]
334 fn test_code_blocks_extraction() {
335 let content = r#"# Test
336
337```rust
338fn main() {}
339```
340
341Some text.
342
343```bash
344echo "hello"
345```
346
347 // Indented code block
348 let x = 5;
349
350```
351No language
352```
353"#
354 .to_string();
355 let path = PathBuf::from("test.md");
356
357 let doc = Document::new(content, path).expect("Failed to create document");
358 let arena = Arena::new();
359 let ast = doc.parse_ast(&arena);
360 let code_blocks = doc.code_blocks(ast);
361
362 assert!(code_blocks.len() >= 3);
364 }
365
366 #[test]
367 fn test_links_extraction() {
368 let content = r#"# Test
369
370[Link 1](http://example.com)
371
372[Link 2](./relative.md)
373
374<https://autolink.com>
375
376[Reference link][ref]
377
378[ref]: http://reference.com
379"#
380 .to_string();
381 let path = PathBuf::from("test.md");
382
383 let doc = Document::new(content, path).expect("Failed to create document");
384 let arena = Arena::new();
385 let ast = doc.parse_ast(&arena);
386
387 assert!(!doc.content.is_empty());
389 assert!(!doc.lines.is_empty());
390
391 let mut has_heading = false;
393 for node in ast.descendants() {
394 if matches!(
395 node.data.borrow().value,
396 comrak::nodes::NodeValue::Heading(_)
397 ) {
398 has_heading = true;
399 break;
400 }
401 }
402 assert!(has_heading, "Expected to find heading in parsed AST");
403 }
404
405 #[test]
406 fn test_line_number_at_offset_edge_cases() {
407 let content = "a\nb\nc".to_string();
408 let path = PathBuf::from("test.md");
409
410 let doc = Document::new(content, path).expect("Failed to create document");
411
412 let line = doc.line_number_at_offset(100);
414 assert!(line >= 1);
415
416 assert_eq!(doc.line_number_at_offset(0), 1); assert_eq!(doc.line_number_at_offset(2), 2); assert_eq!(doc.line_number_at_offset(4), 3); }
421
422 #[test]
423 fn test_ast_parsing_with_extensions() {
424 let content = r#"# Test
425
426| Table | Header |
427|-------|--------|
428| Cell | Data |
429
430~~Strikethrough~~
431
432- [x] Task done
433- [ ] Task pending
434
435^Super^script
436
437[^footnote]: Footnote content
438
439Front matter:
440---
441title: Test
442---
443"#
444 .to_string();
445 let path = PathBuf::from("test.md");
446
447 let doc = Document::new(content, path).expect("Failed to create document");
448 let arena = Arena::new();
449 let ast = doc.parse_ast(&arena);
450
451 assert!(doc.content.contains("~~Strikethrough~~"));
453 assert!(doc.content.contains("| Table | Header |"));
454 assert!(doc.content.contains("- [x] Task done"));
455 assert!(doc.content.contains("title: Test"));
456
457 let node_count = ast.descendants().count();
459 assert!(
460 node_count > 5,
461 "Expected AST to contain multiple nodes, got {node_count}"
462 );
463 }
464
465 #[test]
466 fn test_empty_path() {
467 let content = "# Test".to_string();
468 let path = PathBuf::new();
469
470 let doc = Document::new(content, path).expect("Failed to create document");
471 assert_eq!(doc.path, PathBuf::new());
472 }
473
474 #[test]
475 fn test_complex_nested_structure() {
476 let content = r#"# Main Title
477
478## Section 1
479
480### Subsection
481
482Some text with **bold** and *italic*.
483
484> Blockquote with `code` inside.
485>
486> > Nested blockquote.
487
4881. Ordered list
489 - Nested unordered
490 - Another item
4912. Second ordered item
492
493```rust
494// Code with comments
495fn complex_function() {
496 println!("Complex: {}", "test");
497}
498```
499
500Final paragraph.
501"#
502 .to_string();
503 let path = PathBuf::from("complex.md");
504
505 let doc = Document::new(content, path).expect("Failed to create document");
506 let arena = Arena::new();
507 let ast = doc.parse_ast(&arena);
508
509 let headings = doc.headings(ast);
511 assert!(headings.len() >= 3);
512
513 let code_blocks = doc.code_blocks(ast);
514 assert!(!code_blocks.is_empty());
515
516 assert!(doc.content.contains("# Main Title"));
518 assert!(doc.content.contains("## Section 1"));
519 assert!(doc.content.contains("### Subsection"));
520 assert!(doc.content.contains("```rust"));
521 assert!(doc.content.contains("Final paragraph"));
522
523 assert!(
525 doc.lines.len() > 10,
526 "Expected multiple lines in complex document"
527 );
528 }
529}