use crate::error::{MdBookLintError, Result};
use comrak::nodes::{AstNode, NodeValue};
use comrak::{Arena, ComrakOptions, parse_document};
use std::path::PathBuf;
#[derive(Debug)]
pub struct Document {
pub content: String,
pub path: PathBuf,
pub lines: Vec<String>,
}
impl Document {
pub fn new(content: String, path: PathBuf) -> Result<Self> {
let lines: Vec<String> = content.lines().map(|s| s.to_owned()).collect();
Ok(Document {
content,
path,
lines,
})
}
pub fn parse_ast<'a>(&self, arena: &'a Arena<AstNode<'a>>) -> &'a AstNode<'a> {
let mut options = ComrakOptions::default();
options.extension.strikethrough = true;
options.extension.tagfilter = false;
options.extension.table = true;
options.extension.autolink = true;
options.extension.tasklist = true;
options.extension.superscript = false;
options.extension.header_ids = None;
options.extension.footnotes = true;
options.extension.description_lists = true;
options.extension.front_matter_delimiter = Some("---".to_owned());
options.parse.smart = false;
options.parse.default_info_string = None;
options.parse.relaxed_tasklist_matching = false;
options.parse.relaxed_autolinks = false;
parse_document(arena, &self.content, &options)
}
pub fn parse_ast_with_context<'a>(
&self,
arena: &'a Arena<AstNode<'a>>,
) -> Result<&'a AstNode<'a>> {
let ast = self.parse_ast(arena);
if ast.children().count() == 0 && !self.content.trim().is_empty() {
return Err(MdBookLintError::Document(
"Failed to parse document AST - no content nodes found".to_string(),
));
}
Ok(ast)
}
pub fn line_number_at_offset(&self, offset: usize) -> usize {
let mut current_offset = 0;
for (line_idx, line) in self.lines.iter().enumerate() {
if current_offset + line.len() >= offset {
return line_idx + 1; }
current_offset += line.len() + 1; }
self.lines.len() }
pub fn column_number_at_offset(&self, offset: usize) -> usize {
let mut current_offset = 0;
for line in &self.lines {
if current_offset + line.len() >= offset {
return offset - current_offset + 1; }
current_offset += line.len() + 1; }
1 }
pub fn headings<'a>(&self, ast: &'a AstNode<'a>) -> Vec<&'a AstNode<'a>> {
let mut headings = Vec::new();
self.collect_headings(ast, &mut headings);
headings
}
pub fn headings_with_context<'a>(&self, ast: &'a AstNode<'a>) -> Result<Vec<&'a AstNode<'a>>> {
let headings = self.headings(ast);
Ok(headings)
}
pub fn code_blocks<'a>(&self, ast: &'a AstNode<'a>) -> Vec<&'a AstNode<'a>> {
let mut code_blocks = Vec::new();
self.collect_code_blocks(ast, &mut code_blocks);
code_blocks
}
pub fn code_blocks_with_context<'a>(
&self,
ast: &'a AstNode<'a>,
) -> Result<Vec<&'a AstNode<'a>>> {
let code_blocks = self.code_blocks(ast);
Ok(code_blocks)
}
#[allow(clippy::only_used_in_recursion)]
fn collect_headings<'a>(&self, node: &'a AstNode<'a>, result: &mut Vec<&'a AstNode<'a>>) {
if let NodeValue::Heading(..) = &node.data.borrow().value {
result.push(node)
}
for child in node.children() {
self.collect_headings(child, result);
}
}
#[allow(clippy::only_used_in_recursion)]
fn collect_code_blocks<'a>(&self, node: &'a AstNode<'a>, result: &mut Vec<&'a AstNode<'a>>) {
if let NodeValue::CodeBlock(..) = &node.data.borrow().value {
result.push(node)
}
for child in node.children() {
self.collect_code_blocks(child, result);
}
}
pub fn heading_level<'a>(node: &'a AstNode<'a>) -> Option<u32> {
match &node.data.borrow().value {
NodeValue::Heading(heading) => Some(heading.level.into()),
_ => None,
}
}
pub fn node_text<'a>(&self, node: &'a AstNode<'a>) -> String {
let mut text = String::new();
self.collect_text(node, &mut text);
text
}
#[allow(clippy::only_used_in_recursion)]
fn collect_text<'a>(&self, node: &'a AstNode<'a>, text: &mut String) {
match &node.data.borrow().value {
NodeValue::Text(t) => text.push_str(t),
NodeValue::Code(code) => text.push_str(&code.literal),
_ => {
for child in node.children() {
self.collect_text(child, text);
}
}
}
}
pub fn node_position<'a>(&self, node: &'a AstNode<'a>) -> Option<(usize, usize)> {
let sourcepos = node.data.borrow().sourcepos;
if sourcepos.start.line > 0 {
Some((sourcepos.start.line, sourcepos.start.column))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use comrak::Arena;
use std::path::PathBuf;
#[test]
fn test_document_creation() {
let content = "# Test\n\nThis is a test.".to_string();
let path = PathBuf::from("test.md");
let doc = Document::new(content, path).expect("Failed to create document");
assert_eq!(doc.lines.len(), 3);
assert_eq!(doc.lines[0], "# Test");
assert_eq!(doc.lines[1], "");
assert_eq!(doc.lines[2], "This is a test.");
}
#[test]
fn test_empty_document_allowed() {
let content = "".to_string();
let path = PathBuf::from("empty.md");
let result = Document::new(content, path);
assert!(result.is_ok());
let document = result.unwrap();
assert_eq!(document.content, "");
assert_eq!(document.lines.len(), 0);
assert_eq!(document.path, PathBuf::from("empty.md"));
}
#[test]
fn test_whitespace_only_document_allowed() {
let content = " \n \n ".to_string();
let path = PathBuf::from("whitespace.md");
let result = Document::new(content, path);
assert!(result.is_ok());
let document = result.unwrap();
assert_eq!(document.content, " \n \n ");
assert_eq!(document.lines.len(), 3);
assert_eq!(document.path, PathBuf::from("whitespace.md"));
}
#[test]
fn test_line_number_calculation() {
let content = "Line 1\nLine 2\nLine 3".to_string();
let path = PathBuf::from("test.md");
let doc = Document::new(content, path).expect("Failed to create document");
assert_eq!(doc.line_number_at_offset(0), 1); assert_eq!(doc.line_number_at_offset(7), 2); assert_eq!(doc.line_number_at_offset(14), 3); }
#[test]
fn test_heading_extraction() {
let content = "# H1\n## H2\n### H3\nText".to_string();
let path = PathBuf::from("test.md");
let doc = Document::new(content, path).expect("Failed to create document");
let arena = Arena::new();
let ast = doc.parse_ast(&arena);
let headings = doc.headings(ast);
assert_eq!(headings.len(), 3);
assert_eq!(Document::heading_level(headings[0]), Some(1));
assert_eq!(Document::heading_level(headings[1]), Some(2));
assert_eq!(Document::heading_level(headings[2]), Some(3));
}
}