use std::io::BufRead;
use regex::Regex;
use crate::config::Config;
use crate::errors::FormatterError;
use crate::structures::{Block, Document};
pub struct Parser {
config: Config,
re_extra_spaces: Regex,
re_heading: Regex,
re_bullet: Regex,
}
impl Parser {
pub fn new(config: Config) -> Self {
Parser {
config,
re_extra_spaces: Regex::new(r"\s{2,}").unwrap(),
re_heading: Regex::new(r"^(#+)\s*(.*)$").unwrap(), re_bullet: Regex::new(r"^[-*+]\s+(.*)$").unwrap(),
}
}
pub fn parse<R: BufRead>(&self, reader: R) -> Result<Document, FormatterError> {
let mut doc = Document::new();
let mut paragraph_buffer = String::new();
let mut list_buffer: Vec<String> = vec![];
let mut in_code_block = false;
let mut code_block_buffer: Vec<String> = vec![];
for line_result in reader.lines() {
let mut line = line_result?;
line = line.trim_end().to_string();
if line.trim() == "```" {
if in_code_block {
in_code_block = false;
let code_text = code_block_buffer.join("\n");
doc.blocks.push(Block::CodeBlock(code_text));
code_block_buffer.clear();
} else {
in_code_block = true;
self.flush_paragraph(&mut doc, &mut paragraph_buffer);
self.flush_list(&mut doc, &mut list_buffer);
}
continue;
}
if in_code_block {
code_block_buffer.push(line);
continue;
}
if self.config.remove_extra_spaces {
line = self.re_extra_spaces.replace_all(&line, " ").to_string();
}
if self.config.detect_headings {
if let Some(caps) = self.re_heading.captures(&line) {
self.flush_paragraph(&mut doc, &mut paragraph_buffer);
self.flush_list(&mut doc, &mut list_buffer);
let hashes = &caps[1];
let text = caps[2].trim();
let level = hashes.len() as u8;
doc.blocks.push(Block::Heading(level, text.to_string()));
continue;
}
}
if self.config.detect_lists {
if let Some(caps) = self.re_bullet.captures(&line) {
self.flush_paragraph(&mut doc, &mut paragraph_buffer);
list_buffer.push(caps[1].to_string());
continue;
} else if !list_buffer.is_empty() {
self.flush_list(&mut doc, &mut list_buffer);
}
}
if line.trim().is_empty() {
self.flush_paragraph(&mut doc, &mut paragraph_buffer);
} else {
if !paragraph_buffer.is_empty() {
paragraph_buffer.push(' ');
}
paragraph_buffer.push_str(&line);
}
}
self.flush_paragraph(&mut doc, &mut paragraph_buffer);
self.flush_list(&mut doc, &mut list_buffer);
if in_code_block {
let code_text = code_block_buffer.join("\n");
doc.blocks.push(Block::CodeBlock(code_text));
}
Ok(doc)
}
fn flush_paragraph(&self, doc: &mut Document, buffer: &mut String) {
if !buffer.trim().is_empty() {
doc.blocks.push(Block::Paragraph(buffer.trim().to_string()));
}
buffer.clear();
}
fn flush_list(&self, doc: &mut Document, list_buffer: &mut Vec<String>) {
if !list_buffer.is_empty() {
doc.blocks.push(Block::List(list_buffer.clone()));
}
list_buffer.clear();
}
}