extern crate once_cell;
extern crate regex;
extern crate trim_in_place;
use std::borrow::Cow;
use std::fmt::Write;
use once_cell::sync::Lazy;
use regex::Regex;
use trim_in_place::TrimInPlace;
macro_rules! all_blocks_tag_names_except_p {
() => {
"table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary"
}
}
macro_rules! all_blocks_tag_names {
() => {
concat!(all_blocks_tag_names_except_p!(), "|p")
};
}
macro_rules! all_preserved_tag_names {
() => {
"textarea|script|style|svg"
};
}
macro_rules! all_block_and_preserved_tag_names {
() => {
concat!(all_blocks_tag_names!(), "|", all_preserved_tag_names!())
};
}
macro_rules! pattern_all_blocks_except_p {
() => {
concat!("(?i:", all_blocks_tag_names_except_p!(), ")")
};
}
macro_rules! pattern_all_blocks {
() => {
concat!("(?i:", all_blocks_tag_names!(), ")")
};
}
macro_rules! pattern_all_block_and_preserved_tag_names {
() => {
concat!("(?i:", all_block_and_preserved_tag_names!(), ")")
};
}
macro_rules! pattern_attributes {
() => {
"(?:\\s+[^<>\\s=]+(?:=(?:|(?:[^'\"])|(?:[^'\"][^\\s<>]*[^'\"])|(?:\"[^\"]*\")|(?:'[^']*')))?)*\\s*"
}
}
static RE_PRE_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", "(<pre", pattern_attributes!(), r">)([\s\S]*?)(</pre\s*>)")).unwrap()
});
static RE_TEXTAREA_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!(
"(?i)",
"(<textarea",
pattern_attributes!(),
r">)([\s\S]*?)(</textarea\s*>)"
))
.unwrap()
});
static RE_SCRIPT_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", "(<script", pattern_attributes!(), r">)([\s\S]*?)(</script\s*>)"))
.unwrap()
});
static RE_STYLE_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", "(<style", pattern_attributes!(), r">)([\s\S]*?)(</style\s*>)"))
.unwrap()
});
static RE_SVG_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", "(<svg", pattern_attributes!(), r">)([\s\S]*?)(</svg\s*>)")).unwrap()
});
static RE_BR_ELEMENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)<br\s*/?>").unwrap());
static RE_TAG: Lazy<Regex> =
Lazy::new(|| Regex::new(concat!(r"</?[^\s<]+(", pattern_attributes!(), r")/?>")).unwrap());
static RE_OTHER_NEWLINE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?:\r\n|\r)").unwrap());
static RE_AT_LEAST_TWO_NEWLINES: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n\n+").unwrap());
static RE_EMPTY_PARAGRAPH: Lazy<Regex> = Lazy::new(|| Regex::new(r"<p>\s*</p>").unwrap());
static RE_P_END_TAG_MISSING_START: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!(
"(?i)",
r"(<",
pattern_all_blocks_except_p!(),
pattern_attributes!(),
r">)(\s*)([^<]+)</p>"
))
.unwrap()
});
static RE_P_START_TAG_MISSING_END: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", r"<p>([^<]+)(\s*)(</", pattern_all_blocks_except_p!(), r"\s*>)"))
.unwrap()
});
static RE_LI_IN_PARAGRAPH: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", r"<p>(<li", pattern_attributes!(), r">[\s\S]*)</p>")).unwrap()
});
static RE_BLOCK_AND_PRESERVED_TAG_AFTER_P_START_TAG: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!(
"(?i)",
r"<p>(</?",
pattern_all_block_and_preserved_tag_names!(),
pattern_attributes!(),
r">)"
))
.unwrap()
});
static RE_BLOCK_AND_PRESERVED_TAG_BEFORE_P_END_TAG: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!(
"(?i)",
r"(</?",
pattern_all_block_and_preserved_tag_names!(),
pattern_attributes!(),
r">)</p>"
))
.unwrap()
});
static RE_BR_ELEMENT_AFTER_BLOCK_TAG: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", r"(</?", pattern_all_blocks!(), pattern_attributes!(), r">)<br>\n"))
.unwrap()
});
static RE_BR_ELEMENT_BEFORE_BLOCK_TAG: Lazy<Regex> = Lazy::new(|| {
Regex::new(concat!("(?i)", r"<br>\n(</?", pattern_all_blocks!(), pattern_attributes!(), r">)"))
.unwrap()
});
pub fn auto_p<S: Into<String>>(pee: S, br: bool, esc_pre: bool) -> String {
let mut pee = pee.into();
pee.trim_in_place();
if pee.is_empty() {
return pee;
}
let mut pre_inner_html_buffer: Vec<(String, usize, usize)> = Vec::new();
let mut script_inner_html_buffer: Vec<(String, usize, usize)> = Vec::new();
let mut style_inner_html_buffer: Vec<(String, usize, usize)> = Vec::new();
let mut textarea_inner_html_buffer: Vec<(String, usize, usize)> = Vec::new();
let mut svg_inner_html_buffer: Vec<(String, usize, usize)> = Vec::new();
{
fn reserve(pee: &mut String, regex: &Regex, buffer: &mut Vec<(String, usize, usize)>) {
for captures in regex.captures_iter(pee) {
let m = captures.get(2).unwrap();
buffer.push((String::from(m.as_str()), m.start(), m.end()));
}
let bytes = unsafe { pee.as_mut_vec() };
for (_, start, end) in buffer.iter() {
for e in bytes[*start..*end].iter_mut() {
*e = b'0';
}
}
}
reserve(&mut pee, &*RE_PRE_ELEMENT, &mut pre_inner_html_buffer);
reserve(&mut pee, &*RE_TEXTAREA_ELEMENT, &mut textarea_inner_html_buffer);
reserve(&mut pee, &*RE_SCRIPT_ELEMENT, &mut script_inner_html_buffer);
reserve(&mut pee, &*RE_STYLE_ELEMENT, &mut style_inner_html_buffer);
reserve(&mut pee, &*RE_SVG_ELEMENT, &mut svg_inner_html_buffer);
}
let mut pee = match RE_OTHER_NEWLINE.replace_all(&pee, "\n") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
{
let mut newlines_in_tags: Vec<usize> = Vec::new();
for captures in RE_TAG.captures_iter(&pee) {
let m = captures.get(1).unwrap();
let start = m.start();
for (i, e) in m.as_str().bytes().enumerate() {
if e == b'\n' {
newlines_in_tags.push(i + start);
}
}
}
let bytes = unsafe { pee.as_mut_vec() };
for newline_index in newlines_in_tags {
bytes[newline_index] = b'\r';
}
}
let pees = RE_AT_LEAST_TWO_NEWLINES.split(&pee);
let mut pee = String::with_capacity(pee.len());
for tinkle in pees {
pee.write_fmt(format_args!("<p>{}</p>\n", tinkle.trim())).unwrap();
}
let mut pee = match RE_EMPTY_PARAGRAPH.replace_all(&pee, "") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
pee.trim_matches_in_place('\n');
let pee = match RE_P_END_TAG_MISSING_START.replace_all(&pee, "$1$2<p>$3</p>") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let pee = match RE_P_START_TAG_MISSING_END.replace_all(&pee, "<p>$1</p>$2$3") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let pee = match RE_LI_IN_PARAGRAPH.replace_all(&pee, "$1") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let pee = match RE_BLOCK_AND_PRESERVED_TAG_AFTER_P_START_TAG.replace_all(&pee, "$1") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let pee = match RE_BLOCK_AND_PRESERVED_TAG_BEFORE_P_END_TAG.replace_all(&pee, "$1") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let mut pee = if br {
let mut pee = match RE_BR_ELEMENT.replace_all(&pee, "<br>") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let mut v = Vec::new();
{
let bytes = pee.as_bytes();
let mut p = bytes.len();
loop {
if p == 0 {
break;
}
p -= 1;
let e = bytes[p];
if e == b'\n' {
let mut pp = p;
loop {
if pp == 0 {
break;
}
pp -= 1;
let e = bytes[pp];
if !e.is_ascii_whitespace() {
break;
}
}
if pp < 3 || &bytes[(pp - 3)..=pp] != b"<br>" {
v.push((pp + 1)..p);
}
p = pp;
}
}
}
for range in v.into_iter() {
pee.replace_range(range, "<br>");
}
let pee = match RE_BR_ELEMENT_AFTER_BLOCK_TAG.replace_all(&pee, "$1\n") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
let pee = match RE_BR_ELEMENT_BEFORE_BLOCK_TAG.replace_all(&pee, "\n$1") {
Cow::Owned(pee) => pee,
Cow::Borrowed(_) => pee,
};
pee
} else {
pee
};
{
fn recover(pee: &mut String, regex: &Regex, buffer: &[(String, usize, usize)]) {
let mut v = Vec::with_capacity(buffer.len());
for (captures, inner_html) in regex.captures_iter(pee).zip(buffer.iter()) {
let m = captures.get(2).unwrap();
v.push((m.start()..m.end(), inner_html.0.as_str()));
}
for (range, inner_html) in v.into_iter().rev() {
pee.replace_range(range, inner_html);
}
}
recover(&mut pee, &*RE_SVG_ELEMENT, &svg_inner_html_buffer);
recover(&mut pee, &*RE_STYLE_ELEMENT, &style_inner_html_buffer);
recover(&mut pee, &*RE_SCRIPT_ELEMENT, &script_inner_html_buffer);
recover(&mut pee, &*RE_TEXTAREA_ELEMENT, &svg_inner_html_buffer);
if esc_pre {
let mut v = Vec::with_capacity(pre_inner_html_buffer.len());
for (captures, inner_html) in
RE_PRE_ELEMENT.captures_iter(pee.as_str()).zip(pre_inner_html_buffer.iter())
{
let m = captures.get(2).unwrap();
v.push((m.start()..m.end(), inner_html.0.as_str()));
}
for (range, inner_html) in v.into_iter().rev() {
pee.replace_range(range, html_escape::encode_safe(inner_html).as_ref());
}
} else {
recover(&mut pee, &*RE_PRE_ELEMENT, &pre_inner_html_buffer);
}
}
{
let bytes = unsafe { pee.as_mut_vec() };
for e in bytes {
if *e == b'\r' {
*e = b'\n';
}
}
}
pee
}