use std::collections::HashSet;
use std::sync::LazyLock;
pub static ABBREVIATIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.extend([
"mr", "mrs", "ms", "miss", "dr", "prof", "rev", "fr", "sr", "jr", "messrs", "mmes", "msgr",
"hon", "esq", "phd", "md", "dds", "capt", "col", "gen", "lt", "maj", "sgt", "cpl", "pvt",
"adm", "cmdr", "sen", "rep", "gov", "pres", "sec",
]);
set.extend([
"b.a", "b.s", "m.a", "m.s", "m.b.a", "ph.d", "m.d", "j.d", "ll.b", "ll.m", "d.d.s",
"d.v.m", "pharm.d", "ed.d", "psy.d",
]);
set.extend([
"etc", "vs", "e.g", "i.e", "et al", "cf", "viz", "ibid", "op. cit", "loc. cit", "n.b",
"p.s", "r.s.v.p",
]);
set.extend([
"a.m", "p.m", "b.c", "a.d", "c.e", "b.c.e", "jan", "feb", "mar", "apr", "jun", "jul",
"aug", "sep", "sept", "oct", "nov", "dec", "mon", "tue", "tues", "wed", "thu", "thur",
"thurs", "fri", "sat", "sun",
]);
set.extend([
"st", "ave", "blvd", "rd", "ct", "ln", "pl", "ter", "apt", "ste", "rm", "fl", "bldg",
"dept", "u.s", "u.k", "u.s.a", "e.u", "n.y", "calif", "fla", "mass", "penn", "wash",
]);
set.extend([
"inc", "corp", "ltd", "llc", "co", "bros", "assn", "div", "mfg", "dist", "intl",
]);
set.extend([
"oz", "lb", "lbs", "kg", "g", "mg", "l", "ml", "cm", "mm", "m", "km", "in", "ft", "yd",
"mi", "sq", "cu", "mph", "kph", "rpm", "hp",
]);
set.extend([
"vol", "no", "nos", "p", "pp", "par", "sec", "ch", "fig", "eq", "est", "approx", "min",
"max", "avg",
]);
set.extend([
"misc", "nr", "ref", "refs", "ed", "eds", "trans", "supp", "app", "encl",
]);
set
});
pub fn is_abbreviation(word: &str) -> bool {
let word_lower = word.to_lowercase();
let trimmed = word_lower.trim_matches('.');
ABBREVIATIONS.contains(trimmed)
}
pub fn ends_with_abbreviation(text: &str) -> bool {
if !text.ends_with('.') {
return false;
}
let without_period = text.trim_end_matches('.');
let word = without_period.split_whitespace().next_back().unwrap_or("");
is_abbreviation(word)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn common_abbreviations() {
assert!(is_abbreviation("dr"));
assert!(is_abbreviation("Dr"));
assert!(is_abbreviation("mr"));
assert!(is_abbreviation("etc"));
assert!(is_abbreviation("i.e"));
assert!(is_abbreviation("phd"));
}
#[test]
fn not_abbreviations() {
assert!(!is_abbreviation("hello"));
assert!(!is_abbreviation("world"));
assert!(!is_abbreviation("test"));
}
#[test]
fn ends_with_abbreviation_works() {
assert!(ends_with_abbreviation("This is Dr."));
assert!(ends_with_abbreviation("See page 5 etc."));
assert!(!ends_with_abbreviation("This is a sentence."));
}
}