#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub enum Kind {
Lexicon,
Foreign,
Ordinal,
Roman,
Number,
Acronym,
Proper,
Symbol,
Unknown,
}
impl Kind {
pub fn all() -> &'static [Self] {
use Kind::*;
&[
Lexicon, Foreign, Ordinal, Roman, Number, Acronym, Proper, Symbol,
Unknown,
]
}
pub fn code(self) -> char {
use Kind::*;
match self {
Lexicon => 'l',
Foreign => 'f',
Ordinal => 'o',
Roman => 'r',
Number => 'n',
Acronym => 'a',
Proper => 'p',
Symbol => 's',
Unknown => 'u',
}
}
}
impl From<&str> for Kind {
fn from(word: &str) -> Self {
if is_foreign(word) {
Kind::Foreign
} else if is_ordinal_number(word) {
Kind::Ordinal
} else if is_roman_numeral(word) {
Kind::Roman
} else if is_number(word) {
Kind::Number
} else if is_acronym(word) {
Kind::Acronym
} else if is_probably_proper(word) {
Kind::Proper
} else if word.chars().count() == 1 {
Kind::Symbol
} else {
Kind::Unknown
}
}
}
fn is_foreign(word: &str) -> bool {
word.chars().any(|c| c.is_alphabetic() && !c.is_ascii())
}
const ORD_SUFFIXES: &[&str] =
&["1st", "1ST", "2nd", "2ND", "3rd", "3RD", "th", "TH"];
fn is_ordinal_number(w: &str) -> bool {
if w.chars().count() >= 3 {
for suf in ORD_SUFFIXES {
if let Some(p) = w.strip_suffix(suf) {
return p.chars().all(|c| c.is_ascii_digit());
}
}
}
false
}
const ROMAN_UPPER: &str = "IVXLCDM";
const ROMAN_LOWER: &str = "ivxlcdm";
fn is_roman_numeral(word: &str) -> bool {
!word.is_empty()
&& (word.chars().all(|c| ROMAN_UPPER.contains(c))
|| word.chars().all(|c| ROMAN_LOWER.contains(c)))
}
fn is_number(word: &str) -> bool {
word.chars().any(|c| c.is_ascii_digit())
}
fn is_acronym(word: &str) -> bool {
word.chars().count() >= 2
&& word.chars().all(|c| c.is_uppercase() || c == '.')
}
fn is_probably_proper(word: &str) -> bool {
let mut chars = word.chars();
match chars.next() {
Some(c) if c.is_uppercase() => chars.any(|c| c.is_lowercase()),
_ => false,
}
}