use regex::Regex;
use std::sync::LazyLock;
static ELLIPSIS_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?m)(^|[\w\x22\x27\u{2018}\u{2019}\u{201c}\u{201d}])(\s*)(\.\.\.)([\.,;:\?!\)\-\u{2014}\x22\x27\u{2018}\u{2019}\u{201c}\u{201d}]?)(\s*)",
)
.expect("valid ELLIPSIS_PATTERN regex")
});
fn is_word_char(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
pub fn ellipses(text: &str) -> String {
let mut result = String::new();
let mut last_end = 0;
for m in ELLIPSIS_PATTERN.find_iter(text) {
let caps = ELLIPSIS_PATTERN
.captures(&text[m.start()..])
.expect("captures must succeed after find");
let full_match_start = m.start();
let full_match_end = m.end();
let prefix = caps.get(1).map_or("", |m| m.as_str());
let spaces_before = caps.get(2).map_or("", |m| m.as_str());
let punct = caps.get(4).map_or("", |m| m.as_str());
let spaces_after = caps.get(5).map_or("", |m| m.as_str());
let remaining = &text[full_match_end..];
let next_char = remaining.chars().next();
if let Some(nc) = next_char {
if !is_word_char(nc) {
result.push_str(&text[last_end..full_match_end]);
last_end = full_match_end;
continue;
}
}
result.push_str(&text[last_end..full_match_start]);
result.push_str(prefix);
if !prefix.is_empty() && prefix.starts_with(is_word_char) && spaces_before.is_empty() {
result.push(' ');
} else {
result.push_str(spaces_before);
}
result.push('\u{2026}');
result.push_str(punct);
if let Some(nc) = next_char {
if is_word_char(nc) && spaces_after.is_empty() && punct.is_empty() {
result.push(' ');
} else {
result.push_str(spaces_after);
}
} else {
result.push_str(spaces_after);
}
last_end = full_match_end;
}
result.push_str(&text[last_end..]);
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_ellipsis() {
assert_eq!(ellipses("Hello... world"), "Hello \u{2026} world");
}
#[test]
fn test_ellipsis_with_space() {
assert_eq!(ellipses("Hello ... world"), "Hello \u{2026} world");
}
#[test]
fn test_ellipsis_at_end() {
assert_eq!(ellipses("Hello..."), "Hello \u{2026}");
}
#[test]
fn test_ellipsis_with_punct() {
assert_eq!(ellipses("Hello...!"), "Hello \u{2026}!");
}
}