bfom-lib 0.1.52

Brendan's Flavor of Markdown: I'll build my own markdown format, what could go wrong?
Documentation
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum LineType {
  Header,
  Paragraph,
  HorizontalRule,
  BlockQuote,

  Html(HtmlType),

  Preformatted,
  Table,

  // indent
  UL(usize, ULType),
  // indent, number
  OL(usize, i32),

  FrontMatter,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum HtmlType {
  Normal,
  Comment,
  CData,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ULType {
  Star,
  Minus,
  Plus,
}

pub fn block_line_type(line: &str, excluded: &[String]) -> LineType {
  let trimmed = line.trim();

  if !excluded.contains(&"h".to_string()) && trimmed.starts_with('#') {
    LineType::Header
  }
  // remove dashes and spaces
  // if the result is empty then its a hr
  // if not it will probably become a paragraph
  else if !excluded.contains(&"hr".to_string()) && trimmed.starts_with("----") && line.replace(['-', ' ', '\t'], "") == "" {
    LineType::HorizontalRule
  } else if line.starts_with('>') {
    // spoiler
    if line.starts_with(">!") {
      return LineType::Paragraph;
    }

    if !excluded.contains(&"blockquote".to_string()) {
      LineType::BlockQuote
    } else {
      LineType::Paragraph
    }
  } else if !excluded.contains(&"pre".to_string()) && trimmed.starts_with("```") {
    LineType::Preformatted
  } else if trimmed.starts_with('<') {
    // html sorting here

    // "html"| "html_comment"| "html_cdata"

    // <!--
    if trimmed.starts_with("<!--") && !excluded.contains(&"html_comment".to_string()) {
      return LineType::Html(HtmlType::Comment);
    }

    // <![CDATA[
    if trimmed.starts_with("<![CDATA[") && !excluded.contains(&"html_cdata".to_string()) {
      return LineType::Html(HtmlType::CData);
    }

    let mut char_array = trimmed.chars();
    // skip the first character
    char_array.next();

    // check second character
    if let Some(ch) = char_array.next() {
      match ch {
        '<' => {
          // autolink
          return LineType::Paragraph;
        }
        'a'..='z' | 'A'..='Z' | '0'..='9' => {
          // alphanumeric, can be part of a tag
          // no need to do more, will be caught later on
        }
        _ => {
          // any other space or symbol is not the start of a tag
          return LineType::Paragraph;
        }
      }
    } else {
      // nothing else on the line, definitely not a tag.
      return LineType::Paragraph;
    }

    if !excluded.contains(&"html".to_string()) {
      LineType::Html(HtmlType::Normal)
    } else {
      // paragraph is the catch all if all teh above fail
      LineType::Paragraph
    }
  } else if !excluded.contains(&"table".to_string()) && trimmed.starts_with('|') {
    LineType::Table
  }
  // trim coupled with searching for a space will clear out any blank list start rows
  else if !excluded.contains(&"ul".to_string()) && (trimmed.starts_with("- ") || trimmed.starts_with("+ ") || trimmed.starts_with("* ")) {
    let mut indent = 2;
    for character in line.chars() {
      match character {
        ' ' => {
          indent += 1;
        }
        _ => break,
      }
    }

    let list_type = if trimmed.starts_with("* ") {
      ULType::Star
    } else if trimmed.starts_with("+ ") {
      ULType::Plus
    } else if trimmed.starts_with("- ") {
      ULType::Minus
    } else {
      return LineType::Paragraph;
    };

    LineType::UL(indent, list_type)
  }
  // specifically for https://gitlab.com/silver_rust/bfom/-/issues/2
  else if !excluded.contains(&"fm".to_string()) && trimmed.starts_with("+++") {
    LineType::FrontMatter
  }
  // this covers everything that isn't as easy to find as the above
  else {
    // /((^ {0,8})([0-9]{1,9})(\.{1})( {1})(\w|\d))/gm

    // first check teh first 12 characters for an ordered List
    // diverging from commonmark in this regard to align up to the limit:
    //         1. First Line
    // 123456789. Last Line
    //
    // I can do it as I am not recognising four spaces as a code block

    // up to 8 initial spaces
    // number
    // dot
    // space
    // Non space character

    let characters: Vec<char> = line.chars().collect();
    let mut index = 0;

    // determine initial spaces
    loop {
      if index >= characters.len() {
        break;
      }

      let character = characters[index];

      match character {
        ' ' => {
          // do nothing
        }
        _ => break,
      }
      index += 1;
    }

    /* unlimited indentation allowed
    if index > 8 {
        // not valid
        // too many spaces
        return paragraph;
    }
     */

    // get numbers next
    let mut numbers: Vec<char> = vec![];
    loop {
      if index >= characters.len() {
        break;
      }

      let character = characters[index];
      match character {
        '0'..='9' => {
          numbers.push(character);
        }
        _ => break,
      }
      index += 1;
    }

    if numbers.is_empty() {
      // not valid
      // needs a number
      return LineType::Paragraph;
    }
    if numbers.len() > 9 {
      // not valid
      // too many numbers
      return LineType::Paragraph;
    }

    // need one dot
    let mut dot = false;
    if index < characters.len() {
      let character = characters[index];

      if character == '.' {
        dot = true;
      }

      if character == ')' {
        dot = true;
      }

      index += 1;
    }

    if !dot {
      // not valid
      // needs a dot
      return LineType::Paragraph;
    }

    let mut space = false;
    if index < characters.len() {
      let character = characters[index];

      if character == ' ' {
        space = true;
      }

      index += 1;
    }

    if !space {
      // not valid
      // needs a space
      return LineType::Paragraph;
    }

    // all the above are valid to make it here

    let number_str: String = numbers.into_iter().collect();
    let number: i32 = number_str.parse().unwrap_or(0);

    LineType::OL(index, number)
  }
}

#[cfg(test)]
mod tests {
  use super::*;

  fn test_batch(inputs: Vec<(&str, bool)>, test_type: LineType) {
    for (input, result) in inputs {
      if result {
        assert_eq!(block_line_type(input, &[]), test_type);
      } else {
        assert_ne!(block_line_type(input, &[]), test_type);
      }
    }
  }

  #[test]
  fn header() {
    let inputs = vec![("#", true), ("##", true), ("###", true), ("####", true), ("#####", true), ("######", true), ("#######", true), ("     #", true), ("    ##", true), ("   ###", true), ("  ####", true), (" #####", true), ("######", true)];

    test_batch(inputs, LineType::Header);
  }

  #[test]
  fn hr() {
    let inputs = vec![("-", false), ("--", false), ("---", false), ("----", true), ("----   -", true), ("- ----   -", false), ("      ----   -", true)];

    test_batch(inputs, LineType::HorizontalRule);
  }

  #[test]
  fn blockquote() {
    let inputs = vec![(">", true), (">>", true), (">>>", true), (">!", false), (" >", false)];

    test_batch(inputs, LineType::BlockQuote);
  }
}