pub fn collapse_whitespace(s: &str) -> String {
let chars: Vec<char> = s.chars().collect();
let mut result = String::new();
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
if ch == '\n' {
let mut j = i + 1;
while j < chars.len() && chars[j].is_whitespace() && chars[j] != '\n' {
j += 1;
}
if j < chars.len() && chars[j] == '\n' {
result.push('\n');
result.push('\n');
i = j + 1;
} else {
if !result.ends_with(' ') && !result.is_empty() {
result.push(' ');
}
i += 1;
while i < chars.len() && chars[i].is_whitespace() && chars[i] != '\n' {
i += 1;
}
}
} else if ch.is_whitespace() {
if !result.ends_with(' ') && !result.ends_with('\n') {
result.push(' ');
}
i += 1;
while i < chars.len() && chars[i].is_whitespace() && chars[i] != '\n' {
i += 1;
}
} else {
result.push(ch);
i += 1;
}
}
if result.trim().is_empty() {
String::new()
} else {
result
}
}
pub fn trim_leading_newlines(s: &str) -> &str {
s.trim_start_matches('\n')
}
pub fn trim_trailing_newlines(s: &str) -> &str {
let mut end = s.len();
while end > 0 && s.as_bytes()[end - 1] == b'\n' {
end -= 1;
}
&s[..end]
}
pub fn trim_newlines(s: &str) -> &str {
trim_trailing_newlines(trim_leading_newlines(s))
}
pub fn repeat(ch: char, count: usize) -> String {
(0..count).map(|_| ch).collect()
}
pub fn clean_attribute(attribute: Option<&str>) -> String {
match attribute {
Some(attr) => {
attr.replace("\n", " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
None => String::new(),
}
}
pub const BLOCK_ELEMENTS: &[&str] = &[
"ADDRESS",
"ARTICLE",
"ASIDE",
"AUDIO",
"BLOCKQUOTE",
"BODY",
"CANVAS",
"CENTER",
"DD",
"DIR",
"DIV",
"DL",
"DT",
"FIELDSET",
"FIGCAPTION",
"FIGURE",
"FOOTER",
"FORM",
"FRAMESET",
"H1",
"H2",
"H3",
"H4",
"H5",
"H6",
"HEADER",
"HGROUP",
"HR",
"HTML",
"ISINDEX",
"LI",
"MAIN",
"MENU",
"NAV",
"NOFRAMES",
"NOSCRIPT",
"OL",
"OUTPUT",
"P",
"PRE",
"SECTION",
"TABLE",
"TBODY",
"TD",
"TFOOT",
"TH",
"THEAD",
"TR",
"UL",
];
pub const VOID_ELEMENTS: &[&str] = &[
"AREA", "BASE", "BR", "COL", "COMMAND", "EMBED", "HR", "IMG", "INPUT", "KEYGEN", "LINK",
"META", "PARAM", "SOURCE", "TRACK", "WBR",
];
pub const MEANINGFUL_WHEN_BLANK_ELEMENTS: &[&str] = &[
"A", "TABLE", "THEAD", "TBODY", "TFOOT", "TH", "TD", "IFRAME", "SCRIPT", "AUDIO", "VIDEO",
];
pub fn is_block(tag_name: &str) -> bool {
is_in_list(tag_name, BLOCK_ELEMENTS)
}
pub fn is_void(tag_name: &str) -> bool {
is_in_list(tag_name, VOID_ELEMENTS)
}
pub fn is_meaningful_when_blank(tag_name: &str) -> bool {
is_in_list(tag_name, MEANINGFUL_WHEN_BLANK_ELEMENTS)
}
fn is_in_list(s: &str, list: &[&str]) -> bool {
list.iter().any(|&item| item.eq_ignore_ascii_case(s))
}
#[derive(Clone, Debug)]
pub struct FlankingWhitespace {
pub leading: String,
pub trailing: String,
}
impl FlankingWhitespace {
pub fn new(leading: String, trailing: String) -> Self {
FlankingWhitespace { leading, trailing }
}
}
pub fn is_tracking_image(
src: &str,
alt: &str,
tracking_regex: Option<®ex::Regex>,
strip_without_alt: bool,
) -> bool {
if strip_without_alt && alt.trim().is_empty() {
return true;
}
if let Some(regex) = tracking_regex {
if regex.is_match(src) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_trim_leading_newlines() {
assert_eq!(trim_leading_newlines("\n\ntest"), "test");
assert_eq!(trim_leading_newlines("test"), "test");
}
#[test]
fn test_trim_trailing_newlines() {
assert_eq!(trim_trailing_newlines("test\n\n"), "test");
assert_eq!(trim_trailing_newlines("test"), "test");
}
#[test]
fn test_repeat() {
assert_eq!(repeat('#', 3), "###");
assert_eq!(repeat('=', 2), "==");
}
#[test]
fn test_is_block() {
assert!(is_block("div"));
assert!(is_block("DIV"));
assert!(!is_block("span"));
}
#[test]
fn test_is_void() {
assert!(is_void("br"));
assert!(is_void("BR"));
assert!(!is_void("div"));
}
#[test]
fn test_clean_attribute() {
assert_eq!(clean_attribute(Some(" hello world ")), "hello world");
assert_eq!(clean_attribute(None), "");
}
#[test]
fn test_collapse_whitespace_simple() {
assert_eq!(collapse_whitespace(" and "), " and ");
}
#[test]
fn test_collapse_whitespace_newline() {
assert_eq!(collapse_whitespace("text\nmore"), "text more");
}
#[test]
fn test_collapse_whitespace_with_spaces() {
assert_eq!(collapse_whitespace("text \n more"), "text more");
}
#[test]
fn test_collapse_whitespace_double_newline() {
assert_eq!(collapse_whitespace("para1\n\npara2"), "para1\n\npara2");
}
#[test]
fn test_collapse_whitespace_leading_trailing() {
assert_eq!(collapse_whitespace(" text "), " text ");
}
}