use crate::mark::{
AlignHorizontal, Heading, IndentLevel, Listing, Mark, SeparatorDir, StyleImage, StyleText,
};
use std::collections::HashMap;
#[cfg(windows)]
const LINE_ENDING: &'static str = "\r\n";
#[cfg(not(windows))]
const LINE_ENDING: &'static str = "\n";
#[derive(Debug, Default)]
pub struct Parser {
s: String,
first_page_return: bool,
indent_level: u8,
indent_orderer_number_map: HashMap<IndentLevel, u8>,
is_line_start: bool,
is_ordered: bool,
is_unordered: bool,
style_text: StyleText,
transition_order: usize,
ordered_list_current_indent_level_int: u8,
}
impl Parser {
pub fn new(s: String) -> Self {
let mut indent_orderer_number_map = HashMap::new();
indent_orderer_number_map.insert(IndentLevel::None, 0);
indent_orderer_number_map.insert(IndentLevel::I1, 0);
indent_orderer_number_map.insert(IndentLevel::I2, 0);
indent_orderer_number_map.insert(IndentLevel::I3, 0);
indent_orderer_number_map.insert(IndentLevel::I4, 0);
indent_orderer_number_map.insert(IndentLevel::I5, 0);
Self {
s,
indent_orderer_number_map,
is_line_start: true,
..Default::default()
}
}
pub fn into_pages(iter: Self) -> Vec<(Mark, usize, usize)> {
let mut pages: Vec<(Mark, usize, usize)> = vec![];
for mark in iter {
match mark {
Mark::Page(..) => {
pages.push((mark, 0, 0));
}
_ => {
let mut pages_len = pages.len();
if pages_len <= 0 {
pages.push((Mark::Page(vec![]), 0, 0));
pages_len = 1;
}
if let (Mark::Page(transitions), max_transition_idx, _) =
&mut pages[pages_len - 1]
{
let transitions_len = transitions.len();
let is_transition_end = if let Mark::TransitionEnd = mark {
true
} else {
false
};
if let Mark::Transition(order, _) = mark {
transitions.push(mark);
if order > *max_transition_idx {
*max_transition_idx = order;
}
continue;
}
if transitions_len <= 0 || is_transition_end {
transitions.push(Mark::Transition(0, vec![]));
if is_transition_end {
continue;
}
}
if let Mark::Transition(_, marks) = &mut transitions[transitions_len - 1] {
marks.push(mark);
}
}
}
}
}
pages
}
fn code(&mut self) -> Option<Mark> {
if self.s.starts_with('`') {
let this_line = &self.s[..self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len())];
if let Some(end) = this_line[1..].find('`') {
let text = this_line[1..end + 1].to_owned();
self.s = self.s[end + 3..].to_owned();
return Some(Mark::Text(text, StyleText::new().with_code()));
}
}
None
}
fn code_block(&mut self) -> Option<Mark> {
if self.s.starts_with("```") {
if let Some(cb_end) = self.s.find(&format!("{}```", LINE_ENDING)) {
let first_line_end = self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len());
let first_line = self.s[3..first_line_end].to_owned();
let language = if first_line.len() > 0 {
Some(first_line)
} else {
None
};
let code = self.s[first_line_end + 1..cb_end].to_owned();
self.s = self.s[cb_end + 4..].to_owned();
return Some(Mark::CodeBlock(code, language));
}
}
None
}
fn heading(&mut self) -> Option<Mark> {
if self.s.starts_with('#') {
let line_end = self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len());
let this_line = &self.s[..line_end];
if this_line.len() > 2 {
let mut idx = 1;
let mut c = this_line.chars().nth(idx).unwrap();
let mut heading_level = 1;
while c == '#' && idx < this_line.len() - 1 {
heading_level += 1;
idx += 1;
c = this_line.chars().nth(idx).unwrap();
}
let hash_end = idx - 1;
if let Some(text) = this_line[hash_end + 1..].strip_prefix(' ') {
let text = text.to_owned();
let heading = Heading::from(heading_level);
let style = StyleText::new().with_heading(heading);
self.s = self.s[line_end..].to_owned();
self.is_line_start = false;
return Some(Mark::Text(text, style));
}
}
}
None
}
fn hyperlink(&mut self) -> Option<Mark> {
if self.s.starts_with('<') {
let this_line = &self.s[..self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len())];
if let Some(angle_end) = this_line.find('>') {
let url = this_line[1..angle_end].to_owned();
self.s = self.s[angle_end + 1..].to_owned();
self.is_line_start = false;
return Some(Mark::Text(
url.clone(),
StyleText::new().with_hyperlink(url),
));
}
}
if self.s.starts_with('[') {
let this_line = &self.s[..self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len())];
if let Some(bracket_end) = this_line.find(']') {
if this_line[bracket_end + 1..].starts_with('(') {
if let Some(parens_end) = this_line[bracket_end + 2..].find(')') {
let parens_end = bracket_end + 2 + parens_end;
let title = this_line[1..bracket_end].to_owned();
let url = this_line[bracket_end + 2..parens_end].to_owned();
self.s = self.s[parens_end + 1..].to_owned();
self.is_line_start = false;
return Some(Mark::Text(title, StyleText::new().with_hyperlink(url)));
}
}
}
}
None
}
fn image(&mut self) -> Option<Mark> {
if self.s.starts_with("![") {
let this_line = &self.s[..self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len())];
if let Some(bracket_end) = this_line.find(']') {
if this_line[bracket_end + 1..].starts_with('(') {
if let Some(parens_end) = this_line[bracket_end + 2..].find(')') {
let parens_end = bracket_end + 2 + parens_end;
let title = this_line[2..bracket_end].to_owned();
let url = this_line[bracket_end + 2..parens_end].to_owned();
let mut image_end = parens_end;
let mut style = StyleImage::new();
if this_line[parens_end + 1..].starts_with('<') {
if let Some(angle_end) = this_line[parens_end + 2..].find('>') {
image_end = image_end + angle_end + 2;
let angle_end = parens_end + 2 + angle_end;
let options = this_line[parens_end + 2..angle_end].to_owned();
for option in options.split('|').collect::<Vec<&str>>() {
style = match option {
"auto" => style.with_align_h(AlignHorizontal::Auto),
"left" => style.with_align_h(AlignHorizontal::Left),
"right" => style.with_align_h(AlignHorizontal::Right),
"center" => style.with_align_h(AlignHorizontal::Center),
_ => {
if option.starts_with('w') {
match option[1..].parse::<f32>() {
Ok(n) => style.with_width(n),
_ => style.with_hyperlink(option.to_owned()),
}
} else if option.starts_with('h') {
match option[1..].parse::<f32>() {
Ok(n) => style.with_height(n),
_ => style.with_hyperlink(option.to_owned()),
}
} else {
style.with_hyperlink(option.to_owned())
}
}
};
}
}
}
self.s = self.s[image_end + 1..].to_owned();
self.is_line_start = false;
return Some(Mark::Image(url, title, style));
}
}
}
}
None
}
fn ordered_list(&mut self) -> Option<Mark> {
let line_end = self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len());
let this_line = &self.s[..line_end];
let indent_level = indent(this_line);
let indent = (indent_level.to_int() * 2) as usize;
let mut idx = indent;
let mut b = this_line.as_bytes()[idx];
while b.is_ascii_digit() && idx < this_line.len() - 1 {
idx += 1;
b = this_line.as_bytes()[idx];
}
if this_line[idx..].starts_with(". ") {
let ordered_number = if self.is_ordered
&& self.ordered_list_current_indent_level_int >= indent_level.to_int()
{
let previous_number = self
.indent_orderer_number_map
.get(&indent_level)
.unwrap_or(&0);
previous_number + 1
} else {
1
};
if let Some(number) = self.indent_orderer_number_map.get_mut(&indent_level) {
*number = ordered_number;
}
let text = this_line[idx + 2..].to_owned();
self.s = self.s[line_end..].to_owned();
self.is_line_start = false;
self.is_ordered = true;
self.ordered_list_current_indent_level_int = indent_level.to_int();
return Some(Mark::Text(
text,
StyleText::new().with_listing(Listing::Ordered(ordered_number, indent_level)),
));
}
None
}
fn reset_indent_orderer_number_map(&mut self) {
self.indent_orderer_number_map.insert(IndentLevel::None, 0);
self.indent_orderer_number_map.insert(IndentLevel::I1, 0);
self.indent_orderer_number_map.insert(IndentLevel::I2, 0);
self.indent_orderer_number_map.insert(IndentLevel::I3, 0);
self.indent_orderer_number_map.insert(IndentLevel::I4, 0);
self.indent_orderer_number_map.insert(IndentLevel::I5, 0);
}
fn quote(&mut self) -> Option<Mark> {
if self.s.starts_with("> ") {
let line_end = self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len());
let this_line = &self.s[..line_end];
let text = this_line[2..].to_owned();
self.s = self.s[line_end..].to_owned();
self.is_line_start = false;
return Some(Mark::Text(text, StyleText::new().with_quote()));
}
None
}
fn separator(&mut self) -> Option<Mark> {
if let Some(rest) = self.s.strip_prefix(&format!("----{}", LINE_ENDING)) {
self.s = rest.to_owned();
return Some(Mark::Separator(SeparatorDir::Horizontal));
}
if let Some(rest) = self.s.strip_prefix(&format!("----v{}", LINE_ENDING)) {
self.s = rest.to_owned();
return Some(Mark::Separator(SeparatorDir::Vertical));
}
None
}
fn transition(&mut self) -> Option<Mark> {
if self.s.starts_with("---t") {
let this_line = &self.s[..self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len())];
let order = if this_line.len() > 4 {
let mut idx = 4;
let mut b = this_line.as_bytes()[idx];
while b.is_ascii_digit() && idx < this_line.len() - 1 {
idx += 1;
b = this_line.as_bytes()[idx];
}
idx += 1;
if idx == this_line.len() {
match this_line[4..idx].parse::<usize>() {
Ok(x) => x,
_ => self.transition_order,
}
} else {
self.transition_order
}
} else {
self.transition_order
};
self.transition_order = order + 1;
self.s = self.s[this_line.len()..].to_owned();
return Some(Mark::Transition(order, vec![]));
}
None
}
fn unordered_list(&mut self) -> Option<Mark> {
let line_end = self.s.find(LINE_ENDING).unwrap_or_else(|| self.s.len());
let this_line = &self.s[..line_end];
let indent_level = indent(this_line);
let indent = (indent_level.to_int() * 2) as usize;
if self.s[indent..].starts_with("- ") {
let text = this_line[indent + 1..].to_owned();
self.s = self.s[line_end..].to_owned();
self.is_line_start = false;
self.is_unordered = true;
return Some(Mark::Text(
text,
StyleText::new().with_listing(Listing::Unordered(indent_level)),
));
}
None
}
}
impl Iterator for Parser {
type Item = Mark;
fn next(&mut self) -> Option<Self::Item> {
if !self.first_page_return {
self.first_page_return = true;
return Some(Mark::Page(vec![]));
}
if self.transition_order == 0 {
self.transition_order = 1;
return Some(Mark::Transition(0, vec![]));
}
loop {
if self.s.is_empty() {
return None;
}
if let Some(rest) = self.s.strip_prefix(LINE_ENDING) {
self.s = rest.to_owned();
self.indent_level = 0;
self.is_line_start = true;
self.style_text = StyleText::new();
let is_empty = self.s.is_empty();
if self.s.starts_with(LINE_ENDING) || is_empty {
if !is_empty {
self.s = self.s[1..].to_owned();
}
self.is_ordered = false;
self.is_unordered = false;
self.ordered_list_current_indent_level_int = 0;
self.reset_indent_orderer_number_map();
return Some(Mark::NewLine);
}
}
if self.is_line_start {
if let Some(rest) = self.s.strip_prefix(&format!("---{}", LINE_ENDING)) {
self.s = rest.to_owned();
self.transition_order = 0;
return Some(Mark::Page(vec![]));
}
if let Some(mark) = self.transition() {
return Some(mark);
}
if let Some(rest) = self.s.strip_prefix(&format!("t---{}", LINE_ENDING)) {
self.s = rest.to_owned();
return Some(Mark::TransitionEnd);
}
if let Some(mark) = self.code_block() {
return Some(mark);
}
if let Some(mark) = self.heading() {
return Some(mark);
}
if let Some(mark) = self.image() {
return Some(mark);
}
if let Some(mark) = self.ordered_list() {
return Some(mark);
}
if let Some(mark) = self.quote() {
return Some(mark);
}
if let Some(mark) = self.separator() {
return Some(mark);
}
if let Some(mark) = self.unordered_list() {
return Some(mark);
}
}
if let Some(rest) = self.s.strip_prefix('*') {
self.s = rest.to_owned();
self.is_line_start = false;
self.style_text.bold = !self.style_text.bold;
continue;
}
if let Some(mark) = self.code() {
return Some(mark);
}
if let Some(mark) = self.hyperlink() {
return Some(mark);
}
if let Some(rest) = self.s.strip_prefix('/') {
self.s = rest.to_owned();
self.is_line_start = false;
self.style_text.italics = !self.style_text.italics;
continue;
}
if let Some(rest) = self.s.strip_prefix('$') {
self.s = rest.to_owned();
self.is_line_start = false;
self.style_text.small = !self.style_text.small;
continue;
}
if let Some(rest) = self.s.strip_prefix('~') {
self.s = rest.to_owned();
self.is_line_start = false;
self.style_text.strikethrough = !self.style_text.strikethrough;
continue;
}
if let Some(rest) = self.s.strip_prefix('_') {
self.s = rest.to_owned();
self.is_line_start = false;
self.style_text.underline = !self.style_text.underline;
continue;
}
if self.s.starts_with('\\') && self.s.len() >= 2 {
let text = self.s[1..2].to_owned();
self.s = self.s[2..].to_owned();
self.is_line_start = false;
return Some(Mark::Text(text, StyleText::new()));
}
let end = self
.s
.find(&['*', '`', '~', '_', '/', '$', '^', '\\', '<', '['][..])
.map_or_else(|| self.s.len(), |special| special.max(1));
let line_end = self
.s
.find(LINE_ENDING)
.map_or_else(|| self.s.len(), |special| special.max(1));
let end = if end < line_end { end } else { line_end };
let text = Mark::Text(self.s[..end].to_owned(), self.style_text.clone());
self.s = self.s[end..].to_owned();
self.is_line_start = false;
return Some(text);
}
}
}
fn indent(s: &str) -> IndentLevel {
let mut idx = 0;
let mut c = s.chars().nth(idx).unwrap();
let mut indent_level = 0;
while c == ' ' && idx < s.len() - 1 {
indent_level += 1;
idx += 1;
c = s.chars().nth(idx).unwrap();
}
indent_level = indent_level / 2;
IndentLevel::from(indent_level)
}