const SPAN_PUNCT: &[char] = &['.', ',', ';', ':', '!', '?', ')', ']'];
pub fn clean_span_tail(text: &str) -> (&str, usize) {
let original_chars = text.chars().count();
let mut s = text;
s = s.trim_end_matches(SPAN_PUNCT);
s = s.trim_end_matches("'s");
s = s.trim_end_matches("\u{2019}s");
let cleaned_chars = s.chars().count();
(s, original_chars - cleaned_chars)
}
pub fn clean_span_head(text: &str) -> (&str, usize) {
let original_chars = text.chars().count();
let s = text.trim_start_matches(SPAN_PUNCT);
let cleaned_chars = s.chars().count();
(s, original_chars - cleaned_chars)
}
pub fn clean_span_boundary(text: &str) -> (&str, usize, usize) {
let (s, head_trimmed) = clean_span_head(text);
let (s, tail_trimmed) = clean_span_tail(s);
(s, head_trimmed, tail_trimmed)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tail_trailing_period() {
assert_eq!(clean_span_tail("Seattle."), ("Seattle", 1));
}
#[test]
fn tail_multiple_punct() {
assert_eq!(clean_span_tail("wow!?"), ("wow", 2));
}
#[test]
fn tail_brackets() {
assert_eq!(clean_span_tail("Inc.)"), ("Inc", 2));
assert_eq!(clean_span_tail("array]"), ("array", 1));
}
#[test]
fn tail_possessive_ascii() {
assert_eq!(clean_span_tail("Obama's"), ("Obama", 2));
}
#[test]
fn tail_possessive_curly() {
assert_eq!(clean_span_tail("Obama\u{2019}s"), ("Obama", 2));
}
#[test]
fn tail_possessive_plus_punct() {
assert_eq!(clean_span_tail("Elon Musk's."), ("Elon Musk", 3));
}
#[test]
fn tail_no_op() {
assert_eq!(clean_span_tail("hello"), ("hello", 0));
}
#[test]
fn tail_empty() {
assert_eq!(clean_span_tail(""), ("", 0));
}
#[test]
fn tail_all_punct() {
assert_eq!(clean_span_tail("..."), ("", 3));
}
#[test]
fn tail_unicode_text_with_punct() {
assert_eq!(clean_span_tail("東京."), ("東京", 1));
}
#[test]
fn head_leading_period() {
assert_eq!(clean_span_head(".Seattle"), ("Seattle", 1));
}
#[test]
fn head_multiple() {
assert_eq!(clean_span_head(".,;Seattle"), ("Seattle", 3));
}
#[test]
fn head_no_op() {
assert_eq!(clean_span_head("hello"), ("hello", 0));
}
#[test]
fn head_empty() {
assert_eq!(clean_span_head(""), ("", 0));
}
#[test]
fn boundary_both_ends() {
let (text, h, t) = clean_span_boundary(".,Seattle!?");
assert_eq!(text, "Seattle");
assert_eq!(h, 2);
assert_eq!(t, 2);
}
#[test]
fn boundary_only_trailing() {
let (text, h, t) = clean_span_boundary("Seattle.");
assert_eq!(text, "Seattle");
assert_eq!(h, 0);
assert_eq!(t, 1);
}
#[test]
fn boundary_only_leading() {
let (text, h, t) = clean_span_boundary(".Seattle");
assert_eq!(text, "Seattle");
assert_eq!(h, 1);
assert_eq!(t, 0);
}
#[test]
fn boundary_clean() {
let (text, h, t) = clean_span_boundary("Seattle");
assert_eq!(text, "Seattle");
assert_eq!(h, 0);
assert_eq!(t, 0);
}
#[test]
fn boundary_empty() {
let (text, h, t) = clean_span_boundary("");
assert_eq!(text, "");
assert_eq!(h, 0);
assert_eq!(t, 0);
}
}