use daachorse::{CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder, MatchKind};
use markdown::{to_html_with_options, Options};
use rany::RANY_URL_ID as rany;
pub const QUOTE: &str = "\"";
pub const CODE_BEGIN: &str = "<code>";
pub const CODE_END: &str = "</code>";
#[derive(PartialEq, Debug, Clone, Copy)]
pub enum State {
Attr,
Quote,
Normal,
CodeBegin,
CodeEnd,
VarBegin,
VarEnd,
MathBegin,
MathEnd,
Space,
Dollar,
}
#[static_init::dynamic]
pub static ATTR: CharwiseDoubleArrayAhoCorasick<State> =
CharwiseDoubleArrayAhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build_with_values([
("src=\"", State::Attr),
("href=\"", State::Attr),
(QUOTE, State::Quote),
(CODE_BEGIN, State::CodeBegin),
(CODE_END, State::CodeEnd),
("${", State::VarBegin),
("}", State::VarEnd),
("$`", State::MathBegin),
("`$", State::MathEnd),
(" ", State::Space),
("$", State::Dollar),
])
.unwrap();
#[derive(Debug, Clone, Default)]
pub struct Md {
pub htm: String,
pub code: Vec<String>,
pub code_pos: Vec<usize>,
pub attr: Vec<String>,
}
#[derive(Debug)]
pub enum MdHtm {
Md(Md),
Htm(String),
}
pub fn md_htm_new(htm: String) -> Option<MdHtm> {
let len = htm.len();
if len > 2 && htm.starts_with('<') {
if let Some(c) = htm.chars().nth(2) {
if c.is_alphabetic() {
return Some(MdHtm::Htm(htm));
}
}
}
let mut li = Vec::new();
let mut code = Vec::new();
let mut code_pos = Vec::new();
let mut attr = Vec::new();
let mut pre = 0;
let mut state = State::Normal;
for i in ATTR.leftmost_find_iter(&htm) {
let mut start = i.start();
let end = i.end();
let s = i.value();
macro_rules! push {
($li:expr) => {
li.push(rany.e($li.len()));
if pre != start {
$li.push((&htm[pre..start]).into());
}
pre = start;
state = State::Normal;
};
}
macro_rules! code {
() => {
start = end;
push!(code);
let pos = li.len() - 1;
li[pos] = CODE_BEGIN.to_owned() + &li[pos] + CODE_END;
};
}
match state {
State::Space | State::Dollar => {
if s == State::Dollar {
code!();
}
}
State::MathBegin => {
if s == State::MathEnd {
code!();
}
}
State::VarBegin => {
if s == State::VarEnd {
code!();
}
}
State::CodeBegin => {
if s == State::CodeEnd {
code_pos.push(code.len());
code!();
}
}
State::Attr => {
if s == State::Quote {
push!(attr);
}
}
State::Normal => match s {
State::VarBegin | State::MathBegin | State::Dollar | State::CodeBegin => {
if pre != start {
li.push(htm[pre..start].into());
}
pre = start;
state = s;
}
State::Attr => {
li.push(htm[pre..end].into());
pre = end;
state = s;
}
_ => {}
},
_ => {}
}
}
if pre != htm.len() {
li.push(htm[pre..].into());
}
if li.len() == code.len() {
return None;
}
Some(MdHtm::Md(Md {
htm: li.join(""),
code,
code_pos,
attr,
}))
}
pub fn md_htm(h: &str) -> Option<MdHtm> {
let mut conf = Options::gfm();
conf.compile.allow_dangerous_html = true;
conf.compile.allow_dangerous_protocol = true;
md_htm_new(match to_html_with_options(h, &conf) {
Ok(r) => if r.starts_with("<p>") {
r[3..r.len() - 4].into()
} else {
r
}
.replace(" />", ">"),
Err(e) => {
tracing::error!("{h}\n{:?}", e);
htmlize::escape_text(h).into()
}
})
}