facet_singularize/
lib.rs

1//! Fast, no-regex English singularization.
2//!
3//! This crate provides functions to convert plural English words to their singular form,
4//! without using regex. It's designed for use in deserialization where performance matters.
5//!
6//! # Example
7//!
8//! ```
9//! use facet_singularize::singularize;
10//!
11//! assert_eq!(singularize("dependencies"), "dependency");
12//! assert_eq!(singularize("items"), "item");
13//! assert_eq!(singularize("children"), "child");
14//! assert_eq!(singularize("boxes"), "box");
15//! ```
16//!
17//! # Performance
18//!
19//! This crate uses simple string operations (suffix matching, table lookups) instead of
20//! regex, making it suitable for hot paths like deserialization.
21
22#![no_std]
23#![warn(missing_docs)]
24
25#[cfg(feature = "alloc")]
26extern crate alloc;
27
28#[cfg(feature = "alloc")]
29use alloc::string::String;
30
31/// Irregular plural → singular mappings.
32///
33/// These are common English words where the plural form doesn't follow standard rules.
34/// The list is sorted alphabetically by plural for binary search.
35static IRREGULARS: &[(&str, &str)] = &[
36    ("analyses", "analysis"),
37    ("axes", "axis"),
38    ("bases", "basis"),
39    ("cacti", "cactus"),
40    ("children", "child"),
41    ("crises", "crisis"),
42    ("criteria", "criterion"),
43    ("curricula", "curriculum"),
44    ("data", "datum"),
45    ("diagnoses", "diagnosis"),
46    ("dice", "die"),
47    ("ellipses", "ellipsis"),
48    ("feet", "foot"),
49    ("foci", "focus"),
50    ("formulae", "formula"),
51    ("fungi", "fungus"),
52    ("geese", "goose"),
53    ("genera", "genus"),
54    ("hypotheses", "hypothesis"),
55    ("indices", "index"),
56    ("larvae", "larva"),
57    ("lice", "louse"),
58    ("matrices", "matrix"),
59    ("media", "medium"),
60    ("memoranda", "memorandum"),
61    ("men", "man"),
62    ("mice", "mouse"),
63    ("nebulae", "nebula"),
64    ("nuclei", "nucleus"),
65    ("oases", "oasis"),
66    ("octopi", "octopus"),
67    ("oxen", "ox"),
68    ("parentheses", "parenthesis"),
69    ("people", "person"),
70    ("phenomena", "phenomenon"),
71    ("radii", "radius"),
72    ("stimuli", "stimulus"),
73    ("strata", "stratum"),
74    ("syllabi", "syllabus"),
75    ("synopses", "synopsis"),
76    ("teeth", "tooth"),
77    ("theses", "thesis"),
78    ("vertebrae", "vertebra"),
79    ("vertices", "vertex"),
80    ("women", "woman"),
81];
82
83/// Words that are the same in singular and plural form.
84static UNCOUNTABLE: &[&str] = &[
85    "aircraft",
86    "bison",
87    "buffalo",
88    "deer",
89    "equipment",
90    "fish",
91    "furniture",
92    "information",
93    "machinery",
94    "moose",
95    "news",
96    "rice",
97    "salmon",
98    "series",
99    "sheep",
100    "shrimp",
101    "software",
102    "species",
103    "swine",
104    "trout",
105    "tuna",
106];
107
108/// Convert a plural English word to its singular form.
109///
110/// This function handles:
111/// - Irregular plurals (children → child, people → person, etc.)
112/// - Uncountable nouns (sheep, fish, etc.) - returned unchanged
113/// - Standard suffix rules:
114///   - `-ies` → `-y` (dependencies → dependency)
115///   - `-ves` → `-f` or `-fe` (wolves → wolf, knives → knife)
116///   - `-es` → remove `-es` for words ending in s, x, z, ch, sh (boxes → box)
117///   - `-s` → remove `-s` (items → item)
118///
119/// # Examples
120///
121/// ```
122/// use facet_singularize::singularize;
123///
124/// // Irregular
125/// assert_eq!(singularize("children"), "child");
126/// assert_eq!(singularize("people"), "person");
127/// assert_eq!(singularize("mice"), "mouse");
128///
129/// // Standard rules
130/// assert_eq!(singularize("dependencies"), "dependency");
131/// assert_eq!(singularize("boxes"), "box");
132/// assert_eq!(singularize("items"), "item");
133/// assert_eq!(singularize("wolves"), "wolf");
134///
135/// // Uncountable (unchanged)
136/// assert_eq!(singularize("sheep"), "sheep");
137/// assert_eq!(singularize("fish"), "fish");
138/// ```
139#[cfg(feature = "alloc")]
140pub fn singularize(word: &str) -> String {
141    // Check irregulars first (binary search since list is sorted)
142    if let Ok(idx) = IRREGULARS.binary_search_by_key(&word, |&(plural, _)| plural) {
143        return String::from(IRREGULARS[idx].1);
144    }
145
146    // Check uncountable
147    if UNCOUNTABLE.binary_search(&word).is_ok() {
148        return String::from(word);
149    }
150
151    // Apply suffix rules
152    if let Some(singular) = try_singularize_suffix(word) {
153        return singular;
154    }
155
156    // No rule matched, return as-is
157    String::from(word)
158}
159
160/// Check if a singular word could be the singular form of a plural word.
161///
162/// This is useful for matching node names to field names in deserialization:
163/// - `is_singular_of("dependency", "dependencies")` → `true`
164/// - `is_singular_of("child", "children")` → `true`
165/// - `is_singular_of("item", "items")` → `true`
166///
167/// This function is allocation-free when possible.
168pub fn is_singular_of(singular: &str, plural: &str) -> bool {
169    // Exact match (for uncountable or same word)
170    if singular == plural {
171        return true;
172    }
173
174    // Check irregulars - search by plural, compare singular
175    if let Ok(idx) = IRREGULARS.binary_search_by_key(&plural, |&(p, _)| p) {
176        return IRREGULARS[idx].1 == singular;
177    }
178
179    // Check uncountable
180    if UNCOUNTABLE.binary_search(&plural).is_ok() {
181        return singular == plural;
182    }
183
184    // Check suffix rules without allocation
185    is_singular_of_by_suffix(singular, plural)
186}
187
188/// Try to singularize using suffix rules, returning None if no rule matches.
189#[cfg(feature = "alloc")]
190fn try_singularize_suffix(word: &str) -> Option<String> {
191    let len = word.len();
192
193    // Need at least 2 characters
194    if len < 2 {
195        return None;
196    }
197
198    // -ies → -y (but not -eies, -aies which become -ey, -ay)
199    if len > 3 && word.ends_with("ies") {
200        let prefix = &word[..len - 3];
201        // Check it's not a word like "movies" (movie + s, not movy + ies)
202        let last_char = prefix.chars().last()?;
203        if !matches!(last_char, 'a' | 'e' | 'o' | 'u') {
204            return Some(alloc::format!("{prefix}y"));
205        }
206    }
207
208    // -ves → -f or -fe
209    if len > 3 && word.ends_with("ves") {
210        let prefix = &word[..len - 3];
211        // Common -ves → -fe patterns: knives→knife, wives→wife, lives→life
212        if matches!(prefix, "kni" | "wi" | "li") {
213            return Some(alloc::format!("{prefix}fe"));
214        }
215        // -eaves → -eaf (leaves→leaf, sheaves→sheaf)
216        if prefix.ends_with("ea") {
217            return Some(alloc::format!("{prefix}f"));
218        }
219        // -oaves → -oaf (loaves→loaf)
220        if prefix.ends_with("oa") {
221            return Some(alloc::format!("{prefix}f"));
222        }
223        // -alves → -alf (halves→half, calves→calf)
224        if prefix.ends_with("al") {
225            return Some(alloc::format!("{prefix}f"));
226        }
227        // -elves → -elf (shelves→shelf, selves→self, elves→elf)
228        if prefix.ends_with("el") || prefix == "el" {
229            return Some(alloc::format!("{prefix}f"));
230        }
231        // -olves → -olf (wolves→wolf)
232        if prefix.ends_with("ol") {
233            return Some(alloc::format!("{prefix}f"));
234        }
235        // Default: -ves → -f (might not be correct for all words)
236        return Some(alloc::format!("{prefix}f"));
237    }
238
239    // -es → remove for sibilants (s, x, z, ch, sh)
240    if len > 2 && word.ends_with("es") {
241        let prefix = &word[..len - 2];
242
243        // -zzes → -z (quizzes→quiz, fizzes→fiz)
244        if prefix.ends_with("zz") {
245            return Some(String::from(&prefix[..prefix.len() - 1]));
246        }
247        // -sses → -ss (classes→class, but also masses→mass)
248        // However "classes" should become "class", so we keep the double s
249        if prefix.ends_with("ss") {
250            return Some(String::from(prefix));
251        }
252
253        if prefix.ends_with('s')
254            || prefix.ends_with('x')
255            || prefix.ends_with('z')
256            || prefix.ends_with("ch")
257            || prefix.ends_with("sh")
258        {
259            return Some(String::from(prefix));
260        }
261        // -oes → -o for some words (heroes→hero, potatoes→potato)
262        if prefix.ends_with('o') {
263            return Some(String::from(prefix));
264        }
265    }
266
267    // -s → remove (most common case, check last)
268    if word.ends_with('s') && !word.ends_with("ss") {
269        let prefix = &word[..len - 1];
270        if !prefix.is_empty() {
271            return Some(String::from(prefix));
272        }
273    }
274
275    None
276}
277
278/// Check if singular matches plural by suffix rules, without allocation.
279fn is_singular_of_by_suffix(singular: &str, plural: &str) -> bool {
280    let s_len = singular.len();
281    let p_len = plural.len();
282
283    // -ies → -y
284    if p_len == s_len + 2 && plural.ends_with("ies") && singular.ends_with('y') {
285        return plural[..p_len - 3] == singular[..s_len - 1];
286    }
287
288    // -ves → -f
289    if p_len == s_len + 2 && plural.ends_with("ves") && singular.ends_with('f') {
290        return plural[..p_len - 3] == singular[..s_len - 1];
291    }
292
293    // -ves → -fe
294    if p_len == s_len + 1 && plural.ends_with("ves") && singular.ends_with("fe") {
295        return plural[..p_len - 3] == singular[..s_len - 2];
296    }
297
298    // -es → remove (for sibilants)
299    if p_len == s_len + 2 && plural.ends_with("es") && &plural[..p_len - 2] == singular {
300        // Check singular ends with sibilant
301        return singular.ends_with('s')
302            || singular.ends_with('x')
303            || singular.ends_with('z')
304            || singular.ends_with("ch")
305            || singular.ends_with("sh")
306            || singular.ends_with('o');
307    }
308
309    // -s → remove
310    if p_len == s_len + 1 && plural.ends_with('s') && !plural.ends_with("ss") {
311        return &plural[..p_len - 1] == singular;
312    }
313
314    // Exact match (uncountable that wasn't in our list)
315    singular == plural
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_irregulars() {
324        assert_eq!(singularize("children"), "child");
325        assert_eq!(singularize("people"), "person");
326        assert_eq!(singularize("mice"), "mouse");
327        assert_eq!(singularize("feet"), "foot");
328        assert_eq!(singularize("teeth"), "tooth");
329        assert_eq!(singularize("geese"), "goose");
330        assert_eq!(singularize("men"), "man");
331        assert_eq!(singularize("women"), "woman");
332        assert_eq!(singularize("oxen"), "ox");
333        assert_eq!(singularize("dice"), "die");
334        assert_eq!(singularize("indices"), "index");
335        assert_eq!(singularize("vertices"), "vertex");
336        assert_eq!(singularize("matrices"), "matrix");
337        assert_eq!(singularize("criteria"), "criterion");
338        assert_eq!(singularize("phenomena"), "phenomenon");
339        assert_eq!(singularize("data"), "datum");
340        assert_eq!(singularize("media"), "medium");
341    }
342
343    #[test]
344    fn test_uncountable() {
345        assert_eq!(singularize("sheep"), "sheep");
346        assert_eq!(singularize("fish"), "fish");
347        assert_eq!(singularize("deer"), "deer");
348        assert_eq!(singularize("moose"), "moose");
349        assert_eq!(singularize("series"), "series");
350        assert_eq!(singularize("species"), "species");
351        assert_eq!(singularize("news"), "news");
352        assert_eq!(singularize("software"), "software");
353    }
354
355    #[test]
356    fn test_ies_to_y() {
357        assert_eq!(singularize("dependencies"), "dependency");
358        assert_eq!(singularize("categories"), "category");
359        assert_eq!(singularize("stories"), "story");
360        assert_eq!(singularize("cities"), "city");
361        assert_eq!(singularize("parties"), "party");
362        assert_eq!(singularize("queries"), "query");
363    }
364
365    #[test]
366    fn test_ves_to_f() {
367        assert_eq!(singularize("wolves"), "wolf");
368        assert_eq!(singularize("halves"), "half");
369        assert_eq!(singularize("shelves"), "shelf");
370        assert_eq!(singularize("leaves"), "leaf");
371        assert_eq!(singularize("calves"), "calf");
372    }
373
374    #[test]
375    fn test_ves_to_fe() {
376        assert_eq!(singularize("knives"), "knife");
377        assert_eq!(singularize("wives"), "wife");
378        assert_eq!(singularize("lives"), "life");
379    }
380
381    #[test]
382    fn test_es_sibilants() {
383        assert_eq!(singularize("boxes"), "box");
384        assert_eq!(singularize("matches"), "match");
385        assert_eq!(singularize("watches"), "watch");
386        assert_eq!(singularize("dishes"), "dish");
387        assert_eq!(singularize("bushes"), "bush");
388        assert_eq!(singularize("classes"), "class");
389        assert_eq!(singularize("buses"), "bus");
390        assert_eq!(singularize("quizzes"), "quiz");
391    }
392
393    #[test]
394    fn test_oes_to_o() {
395        assert_eq!(singularize("heroes"), "hero");
396        assert_eq!(singularize("potatoes"), "potato");
397        assert_eq!(singularize("tomatoes"), "tomato");
398        assert_eq!(singularize("echoes"), "echo");
399    }
400
401    #[test]
402    fn test_simple_s() {
403        assert_eq!(singularize("items"), "item");
404        assert_eq!(singularize("samples"), "sample");
405        assert_eq!(singularize("users"), "user");
406        assert_eq!(singularize("configs"), "config");
407        assert_eq!(singularize("servers"), "server");
408        assert_eq!(singularize("handlers"), "handler");
409    }
410
411    #[test]
412    fn test_is_singular_of() {
413        // Irregulars
414        assert!(is_singular_of("child", "children"));
415        assert!(is_singular_of("person", "people"));
416        assert!(is_singular_of("mouse", "mice"));
417
418        // Standard rules
419        assert!(is_singular_of("dependency", "dependencies"));
420        assert!(is_singular_of("box", "boxes"));
421        assert!(is_singular_of("item", "items"));
422        assert!(is_singular_of("wolf", "wolves"));
423        assert!(is_singular_of("knife", "knives"));
424
425        // Uncountable
426        assert!(is_singular_of("sheep", "sheep"));
427        assert!(is_singular_of("fish", "fish"));
428
429        // Non-matches
430        assert!(!is_singular_of("cat", "dogs"));
431        assert!(!is_singular_of("dependency", "items"));
432    }
433
434    #[test]
435    fn test_already_singular() {
436        // Words that don't end in common plural suffixes should be returned as-is
437        assert_eq!(singularize("config"), "config");
438        assert_eq!(singularize("item"), "item");
439    }
440}