md_htm 0.1.0

markdown to html
Documentation
use daachorse::{CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder, MatchKind};
use markdown::{to_html_with_options, Options};
use rany::RANY_URL_ID as rany;

pub const QUOTE: &str = "\"";
pub const CODE_BEGIN: &str = "<code>";
pub const CODE_END: &str = "</code>";

#[derive(PartialEq, Debug, Clone, Copy)]
pub enum State {
  Attr,
  Quote,
  Normal,
  CodeBegin,
  CodeEnd,
  VarBegin,
  VarEnd,
  MathBegin,
  MathEnd,
  Space,
  Dollar,
}

#[static_init::dynamic]
pub static ATTR: CharwiseDoubleArrayAhoCorasick<State> =
  CharwiseDoubleArrayAhoCorasickBuilder::new()
    .match_kind(MatchKind::LeftmostFirst)
    .build_with_values([
      ("src=\"", State::Attr),
      ("href=\"", State::Attr),
      (QUOTE, State::Quote),
      (CODE_BEGIN, State::CodeBegin),
      (CODE_END, State::CodeEnd),
      ("${", State::VarBegin),
      ("}", State::VarEnd),
      ("$`", State::MathBegin),
      ("`$", State::MathEnd),
      (" ", State::Space),
      ("$", State::Dollar),
    ])
    .unwrap();

#[derive(Debug, Clone, Default)]
pub struct Md {
  pub htm: String,
  pub code: Vec<String>,
  /// code 转 markdown 的时候需要加上`, ${var}不需要
  pub code_pos: Vec<usize>,
  pub attr: Vec<String>,
}

#[derive(Debug)]
pub enum MdHtm {
  Md(Md),
  Htm(String),
}

pub fn md_htm_new(htm: String) -> Option<MdHtm> {
  let len = htm.len();
  if len > 2 && htm.starts_with('<') {
    if let Some(c) = htm.chars().nth(2) {
      if c.is_alphabetic() {
        return Some(MdHtm::Htm(htm));
      }
    }
  }

  let mut li = Vec::new();
  let mut code = Vec::new();
  let mut code_pos = Vec::new();
  let mut attr = Vec::new();

  let mut pre = 0;
  let mut state = State::Normal;

  for i in ATTR.leftmost_find_iter(&htm) {
    let mut start = i.start();
    let end = i.end();
    let s = i.value();

    macro_rules! push {
      ($li:expr) => {
        li.push(rany.e($li.len()));
        if pre != start {
          $li.push((&htm[pre..start]).into());
        }
        pre = start;
        state = State::Normal;
      };
    }

    macro_rules! code {
      () => {
        start = end;
        push!(code);
        let pos = li.len() - 1;
        li[pos] = CODE_BEGIN.to_owned() + &li[pos] + CODE_END;
      };
    }

    match state {
      State::Space | State::Dollar => {
        if s == State::Dollar {
          code!();
        }
      }
      State::MathBegin => {
        if s == State::MathEnd {
          code!();
        }
      }
      State::VarBegin => {
        if s == State::VarEnd {
          code!();
        }
      }
      State::CodeBegin => {
        if s == State::CodeEnd {
          code_pos.push(code.len());
          code!();
        }
      }
      State::Attr => {
        if s == State::Quote {
          push!(attr);
        }
      }
      State::Normal => match s {
        State::VarBegin | State::MathBegin | State::Dollar | State::CodeBegin => {
          if pre != start {
            li.push(htm[pre..start].into());
          }
          pre = start;
          state = s;
        }
        State::Attr => {
          li.push(htm[pre..end].into());
          pre = end;
          state = s;
        }
        _ => {}
      },
      _ => {}
    }
  }
  if pre != htm.len() {
    li.push(htm[pre..].into());
  }

  if li.len() == code.len() {
    return None;
  }

  Some(MdHtm::Md(Md {
    htm: li.join(""),
    code,
    code_pos,
    attr,
  }))
}

pub fn md_htm(h: &str) -> Option<MdHtm> {
  let mut conf = Options::gfm();
  conf.compile.allow_dangerous_html = true;
  conf.compile.allow_dangerous_protocol = true;

  md_htm_new(match to_html_with_options(h, &conf) {
    Ok(r) => if r.starts_with("<p>") {
      r[3..r.len() - 4].into()
    } else {
      r
    }
    .replace(" />", ">"),
    Err(e) => {
      tracing::error!("{h}\n{:?}", e);
      htmlize::escape_text(h).into()
    }
  })
}