#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
use regex::Regex;
use std::borrow::Cow;
use std::sync::LazyLock;
static ESCAPE_MISC_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([\\&<`\[\]>~#=+|\-])").expect("valid regex pattern"));
static ESCAPE_NUMBERED_LIST_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([0-9])([.)])").expect("valid regex pattern"));
static ESCAPE_ASCII_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([!\x22#$%&\x27()*+,\-./:;<=>?@\[\\\]^_`{|}~])").expect("valid regex pattern"));
#[allow(clippy::fn_params_excessive_bools)]
pub fn escape(
text: &str,
escape_misc: bool,
escape_asterisks: bool,
escape_underscores: bool,
escape_ascii: bool,
) -> Cow<'_, str> {
if text.is_empty() {
return Cow::Borrowed("");
}
if !escape_misc && !escape_asterisks && !escape_underscores && !escape_ascii {
return Cow::Borrowed(text);
}
if escape_ascii
&& !text.as_bytes().iter().any(|b| {
matches!(
b,
b'!' | b'"'
| b'#'
| b'$'
| b'%'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b'-'
| b'.'
| b'/'
| b':'
| b';'
| b'<'
| b'='
| b'>'
| b'?'
| b'@'
| b'['
| b'\\'
| b']'
| b'^'
| b'_'
| b'`'
| b'{'
| b'|'
| b'}'
| b'~'
)
})
{
return Cow::Borrowed(text);
}
if !escape_ascii && escape_misc && !escape_asterisks && !escape_underscores {
let needs_misc = text.as_bytes().iter().any(|b| {
matches!(
b,
b'\\' | b'&' | b'<' | b'`' | b'[' | b']' | b'>' | b'~' | b'#' | b'=' | b'+' | b'|' | b'-'
)
});
let needs_numbered = text.as_bytes().iter().any(|b| matches!(b, b'.' | b')'));
if !needs_misc && !needs_numbered {
return Cow::Borrowed(text);
}
}
let mut result: Cow<'_, str> = Cow::Borrowed(text);
if escape_ascii {
result = match ESCAPE_ASCII_RE.replace_all(result.as_ref(), r"\$1") {
Cow::Borrowed(_) => result,
Cow::Owned(s) => Cow::Owned(s),
};
return result;
}
if escape_misc {
result = match ESCAPE_MISC_RE.replace_all(result.as_ref(), r"\$1") {
Cow::Borrowed(_) => result,
Cow::Owned(s) => Cow::Owned(s),
};
result = match ESCAPE_NUMBERED_LIST_RE.replace_all(result.as_ref(), r"$1\$2") {
Cow::Borrowed(_) => result,
Cow::Owned(s) => Cow::Owned(s),
};
}
if escape_asterisks && result.contains('*') {
result = Cow::Owned(result.replace('*', r"\*"));
}
if escape_underscores && result.contains('_') {
result = Cow::Owned(result.replace('_', r"\_"));
}
result
}
#[must_use]
pub fn chomp(text: &str) -> (&str, &str, &str) {
if text.is_empty() {
return ("", "", "");
}
let prefix = if text.starts_with(|c: char| c.is_whitespace()) {
" "
} else {
""
};
let suffix = if text.ends_with("\n\n") || text.ends_with("\r\n\r\n") {
"\n\n"
} else if text.ends_with([' ', '\t']) {
" "
} else {
""
};
let trimmed = if suffix == "\n\n" {
text.trim_end_matches("\n\n").trim_end_matches("\r\n\r\n").trim()
} else {
text.trim()
};
(prefix, suffix, trimmed)
}
#[must_use]
pub fn normalize_whitespace(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut prev_was_space = false;
for ch in text.chars() {
let is_space = ch == ' ' || ch == '\t' || is_unicode_space(ch);
if is_space {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
}
} else {
result.push(ch);
prev_was_space = false;
}
}
result
}
#[must_use]
pub fn normalize_whitespace_cow(text: &str) -> Cow<'_, str> {
let mut prev_was_space = false;
for ch in text.chars() {
let is_space = ch == ' ' || ch == '\t' || is_unicode_space(ch);
if is_space {
if prev_was_space || ch != ' ' {
return Cow::Owned(normalize_whitespace(text));
}
prev_was_space = true;
} else {
prev_was_space = false;
}
}
Cow::Borrowed(text)
}
#[must_use]
pub fn decode_html_entities(text: &str) -> String {
html_escape::decode_html_entities(text).into_owned()
}
#[must_use]
pub fn decode_html_entities_cow(text: &str) -> Cow<'_, str> {
if !text.contains('&') {
return Cow::Borrowed(text);
}
html_escape::decode_html_entities(text)
}
const fn is_unicode_space(ch: char) -> bool {
matches!(
ch,
'\u{00A0}'
| '\u{1680}'
| '\u{2000}'
| '\u{2001}'
| '\u{2002}'
| '\u{2003}'
| '\u{2004}'
| '\u{2005}'
| '\u{2006}'
| '\u{2007}'
| '\u{2008}'
| '\u{2009}'
| '\u{200A}'
| '\u{202F}'
| '\u{205F}'
| '\u{3000}'
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_misc() {
assert_eq!(escape("foo & bar", true, false, false, false), r"foo \& bar");
assert_eq!(escape("foo [bar]", true, false, false, false), r"foo \[bar\]");
assert_eq!(escape("1. Item", true, false, false, false), r"1\. Item");
assert_eq!(escape("1) Item", true, false, false, false), r"1\) Item");
}
#[test]
fn test_escape_asterisks() {
assert_eq!(escape("foo * bar", false, true, false, false), r"foo \* bar");
assert_eq!(escape("**bold**", false, true, false, false), r"\*\*bold\*\*");
}
#[test]
fn test_escape_underscores() {
assert_eq!(escape("foo_bar", false, false, true, false), r"foo\_bar");
assert_eq!(escape("__bold__", false, false, true, false), r"\_\_bold\_\_");
}
#[test]
fn test_escape_ascii() {
assert_eq!(escape(r##"!"#$%&"##, false, false, false, true), r#"\!\"\#\$\%\&"#);
assert_eq!(escape("*+,-./", false, false, false, true), r"\*\+\,\-\.\/");
assert_eq!(escape("<=>?@", false, false, false, true), r"\<\=\>\?\@");
assert_eq!(escape(r"[\]^_`", false, false, false, true), r"\[\\\]\^\_\`");
assert_eq!(escape("{|}~", false, false, false, true), r"\{\|\}\~");
}
#[test]
fn test_chomp() {
assert_eq!(chomp(" text "), (" ", " ", "text"));
assert_eq!(chomp("text"), ("", "", "text"));
assert_eq!(chomp(" text"), (" ", "", "text"));
assert_eq!(chomp("text "), ("", " ", "text"));
assert_eq!(chomp(""), ("", "", ""));
}
}