use split::graphemes;
pub fn strip_bom(subject: &str) -> String {
match subject.len() {
0 => "".to_string(),
_ => {
if crate::chop::first(subject, 1) == "\u{FEFF}" {
crate::chop::slice(subject, 1, 0)
} else {
subject.to_string()
}
}
}
}
pub fn strip_tags(subject: &str) -> String {
match subject.len() {
0 => "".to_string(),
_ => strip_html_tags(subject),
}
}
#[derive(Clone, Copy, PartialEq)]
enum StateMode {
Output,
Html,
Exclamation,
Comment,
}
fn unicode_string_range(subject: &str, start: usize, end: usize) -> String {
graphemes(subject)[start..end]
.iter()
.map(|c| (*c).to_string())
.collect::<String>()
}
fn strip_html_tags(subject: &str) -> String {
let length = subject.len();
let mut state: StateMode = StateMode::Output;
let mut depth = 0;
let mut output = String::with_capacity(length);
let mut quote = String::with_capacity(4);
let g = graphemes(subject);
let g_length = g.len();
for (i, c) in g.iter().enumerate() {
let mut advance = false;
match *c {
"<" => {
if !quote.is_empty() {
} else if i + 2 < g_length
&& crate::query::query(
unicode_string_range(subject, i, i + 2).as_str(),
"< ",
0,
)
{
advance = true;
} else if state == StateMode::Output {
advance = true;
state = StateMode::Html;
} else if state == StateMode::Html {
depth += 1;
} else {
advance = true;
}
}
"!" => {
if state == StateMode::Html
&& i + 2 < g_length
&& crate::query::query(
unicode_string_range(subject, i, i + 2).as_str(),
"<!",
0,
)
{
state = StateMode::Exclamation;
} else {
advance = true;
}
}
"-" => {
if state == StateMode::Exclamation
&& i + 3 < g_length
&& crate::query::query(
unicode_string_range(subject, i, i + 3).as_str(),
"!--",
0,
)
{
state = StateMode::Comment;
} else {
advance = true;
}
}
"\"" | "'" => {
if state == StateMode::Html {
let c_copy = (*c).to_string();
if quote.as_str() == c_copy {
quote = String::from("");
} else if quote.is_empty() {
quote = c_copy;
}
} else {
advance = true;
}
}
"E" | "e" => {
if state == StateMode::Exclamation
&& i + 7 < g_length
&& crate::query::query(
unicode_string_range(subject, i, i + 7).as_str(),
"doctype",
0,
)
{
state = StateMode::Html;
} else {
advance = true;
}
}
">" => {
if depth > 0 {
depth -= 1;
} else if !quote.is_empty() {
} else if state == StateMode::Html
|| state == StateMode::Exclamation
|| state == StateMode::Comment
&& i + 3 < g_length
&& crate::query::query(
unicode_string_range(subject, i, i + 3).as_str(),
"-->",
0,
)
{
quote = String::from("");
state = StateMode::Output;
} else {
advance = true;
}
}
_ => {
advance = true;
}
}
if advance {
match state {
StateMode::Output => {
output.push_str(c);
}
StateMode::Html => {}
_ => {}
}
}
}
output
}