use std::borrow::Cow;
#[inline]
pub fn escape(
text: &str,
escape_misc: bool,
escape_asterisks: bool,
escape_underscores: bool,
escape_ascii: bool,
) -> Cow<'_, str> {
if text.is_empty() {
return Cow::Borrowed("");
}
if !escape_misc && !escape_asterisks && !escape_underscores && !escape_ascii {
return Cow::Borrowed(text);
}
let mut result: Cow<'_, str> = Cow::Borrowed(text);
if escape_ascii {
return find_ascii_punctuation(text.as_bytes()).map_or(Cow::Borrowed(text), |first| {
Cow::Owned(escape_chars_from(text, first, is_ascii_punctuation))
});
}
if escape_misc && let Some(escaped) = escape_misc_and_numbered_markers(result.as_ref()) {
result = Cow::Owned(escaped);
}
if escape_asterisks && result.contains('*') {
result = Cow::Owned(result.replace('*', r"\*"));
}
if escape_underscores && result.contains('_') {
result = Cow::Owned(result.replace('_', r"\_"));
}
result
}
#[inline]
fn find_ascii_punctuation(bytes: &[u8]) -> Option<usize> {
bytes.iter().position(|byte| byte.is_ascii_punctuation())
}
fn escape_chars_from(text: &str, first_escape: usize, should_escape: fn(char) -> bool) -> String {
let mut escaped = String::with_capacity(text.len() + 1);
escaped.push_str(&text[..first_escape]);
for ch in text[first_escape..].chars() {
if should_escape(ch) {
escaped.push('\\');
}
escaped.push(ch);
}
escaped
}
fn escape_misc_and_numbered_markers(text: &str) -> Option<String> {
let mut first_escape = None;
let mut prev_was_digit = false;
for (idx, ch) in text.char_indices() {
if is_misc_markdown_char(ch) || (prev_was_digit && matches!(ch, '.' | ')')) {
first_escape = Some(idx);
break;
}
prev_was_digit = ch.is_ascii_digit();
}
let first_escape = first_escape?;
let mut escaped = String::with_capacity(text.len() + 1);
escaped.push_str(&text[..first_escape]);
prev_was_digit = text[..first_escape]
.chars()
.next_back()
.is_some_and(|ch| ch.is_ascii_digit());
for ch in text[first_escape..].chars() {
if is_misc_markdown_char(ch) || (prev_was_digit && matches!(ch, '.' | ')')) {
escaped.push('\\');
}
escaped.push(ch);
prev_was_digit = ch.is_ascii_digit();
}
Some(escaped)
}
const fn is_misc_markdown_char(ch: char) -> bool {
matches!(
ch,
'\\' | '&' | '<' | '`' | '[' | ']' | '>' | '~' | '#' | '=' | '+' | '|' | '-'
)
}
const fn is_ascii_punctuation(ch: char) -> bool {
ch.is_ascii_punctuation()
}
#[must_use]
#[inline]
pub fn chomp(text: &str) -> (&str, &str, &str) {
if text.is_empty() {
return ("", "", "");
}
let prefix = if text.starts_with(|c: char| c.is_whitespace()) {
" "
} else {
""
};
let suffix = if text.ends_with("\n\n") || text.ends_with("\r\n\r\n") {
"\n\n"
} else if text.ends_with([' ', '\t']) {
" "
} else {
""
};
let trimmed = if suffix == "\n\n" {
text.trim_end_matches("\n\n")
.trim_end_matches("\r\n\r\n")
.trim()
} else {
text.trim()
};
(prefix, suffix, trimmed)
}
#[must_use]
pub fn normalize_whitespace(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut prev_was_space = false;
for ch in text.chars() {
let is_space = ch == ' ' || ch == '\t' || is_unicode_space(ch);
if is_space {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
}
} else {
result.push(ch);
prev_was_space = false;
}
}
result
}
#[must_use]
#[inline]
pub fn normalize_whitespace_cow(text: &str) -> Cow<'_, str> {
let mut prev_was_space = false;
for ch in text.chars() {
let is_space = ch == ' ' || ch == '\t' || is_unicode_space(ch);
if is_space {
if prev_was_space || ch != ' ' {
return Cow::Owned(normalize_whitespace(text));
}
prev_was_space = true;
} else {
prev_was_space = false;
}
}
Cow::Borrowed(text)
}
#[must_use]
pub fn decode_html_entities(text: &str) -> String {
html_escape::decode_html_entities(text).into_owned()
}
#[must_use]
#[inline]
pub fn decode_html_entities_cow(text: &str) -> Cow<'_, str> {
if !text.contains('&') {
return Cow::Borrowed(text);
}
html_escape::decode_html_entities(text)
}
#[inline]
const fn is_unicode_space(ch: char) -> bool {
matches!(
ch,
'\u{00A0}'
| '\u{1680}'
| '\u{2000}'
| '\u{2001}'
| '\u{2002}'
| '\u{2003}'
| '\u{2004}'
| '\u{2005}'
| '\u{2006}'
| '\u{2007}'
| '\u{2008}'
| '\u{2009}'
| '\u{200A}'
| '\u{202F}'
| '\u{205F}'
| '\u{3000}'
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_misc() {
assert_eq!(
escape("foo & bar", true, false, false, false),
r"foo \& bar"
);
assert_eq!(
escape("foo [bar]", true, false, false, false),
r"foo \[bar\]"
);
assert_eq!(escape("1. Item", true, false, false, false), r"1\. Item");
assert_eq!(escape("1) Item", true, false, false, false), r"1\) Item");
}
#[test]
fn test_escape_asterisks() {
assert_eq!(
escape("foo * bar", false, true, false, false),
r"foo \* bar"
);
assert_eq!(
escape("**bold**", false, true, false, false),
r"\*\*bold\*\*"
);
}
#[test]
fn test_escape_underscores() {
assert_eq!(escape("foo_bar", false, false, true, false), r"foo\_bar");
assert_eq!(
escape("__bold__", false, false, true, false),
r"\_\_bold\_\_"
);
}
#[test]
fn test_escape_ascii() {
assert_eq!(
escape(r##"!"#$%&"##, false, false, false, true),
r#"\!\"\#\$\%\&"#
);
assert_eq!(escape("*+,-./", false, false, false, true), r"\*\+\,\-\.\/");
assert_eq!(escape("<=>?@", false, false, false, true), r"\<\=\>\?\@");
assert_eq!(
escape(r"[\]^_`", false, false, false, true),
r"\[\\\]\^\_\`"
);
assert_eq!(escape("{|}~", false, false, false, true), r"\{\|\}\~");
}
#[test]
fn test_chomp() {
assert_eq!(chomp(" text "), (" ", " ", "text"));
assert_eq!(chomp("text"), ("", "", "text"));
assert_eq!(chomp(" text"), (" ", "", "text"));
assert_eq!(chomp("text "), ("", " ", "text"));
assert_eq!(chomp(""), ("", "", ""));
}
}