facet_singularize/
lib.rs

1//! Fast, no-regex English singularization.
2//!
3//! This crate provides functions to convert plural English words to their singular form,
4//! without using regex. It's designed for use in deserialization where performance matters.
5//!
6//! # Example
7//!
8//! ```
9//! use facet_singularize::singularize;
10//!
11//! assert_eq!(singularize("dependencies"), "dependency");
12//! assert_eq!(singularize("items"), "item");
13//! assert_eq!(singularize("children"), "child");
14//! assert_eq!(singularize("boxes"), "box");
15//! ```
16//!
17//! # Performance
18//!
19//! This crate uses simple string operations (suffix matching, table lookups) instead of
20//! regex, making it suitable for hot paths like deserialization.
21
22#![no_std]
23#![warn(missing_docs)]
24
25#[cfg(feature = "alloc")]
26extern crate alloc;
27
28#[cfg(feature = "alloc")]
29use alloc::string::String;
30
31mod ie_exceptions;
32
33/// Irregular plural → singular mappings.
34///
35/// These are common English words where the plural form doesn't follow standard rules.
36/// The list is sorted alphabetically by plural for binary search.
37static IRREGULARS: &[(&str, &str)] = &[
38    ("analyses", "analysis"),
39    ("axes", "axis"),
40    ("bases", "basis"),
41    ("cacti", "cactus"),
42    ("children", "child"),
43    ("crises", "crisis"),
44    ("criteria", "criterion"),
45    ("curricula", "curriculum"),
46    ("data", "datum"),
47    ("diagnoses", "diagnosis"),
48    ("dice", "die"),
49    ("ellipses", "ellipsis"),
50    ("feet", "foot"),
51    ("foci", "focus"),
52    ("formulae", "formula"),
53    ("fungi", "fungus"),
54    ("geese", "goose"),
55    ("genera", "genus"),
56    ("hypotheses", "hypothesis"),
57    ("indices", "index"),
58    ("larvae", "larva"),
59    ("lice", "louse"),
60    ("matrices", "matrix"),
61    ("media", "medium"),
62    ("memoranda", "memorandum"),
63    ("men", "man"),
64    ("mice", "mouse"),
65    ("nebulae", "nebula"),
66    ("nuclei", "nucleus"),
67    ("oases", "oasis"),
68    ("octopi", "octopus"),
69    ("oxen", "ox"),
70    ("parentheses", "parenthesis"),
71    ("people", "person"),
72    ("phenomena", "phenomenon"),
73    ("radii", "radius"),
74    ("stimuli", "stimulus"),
75    ("strata", "stratum"),
76    ("syllabi", "syllabus"),
77    ("synopses", "synopsis"),
78    ("teeth", "tooth"),
79    ("theses", "thesis"),
80    ("vertebrae", "vertebra"),
81    ("vertices", "vertex"),
82    ("women", "woman"),
83];
84
85/// Words that are the same in singular and plural form.
86static UNCOUNTABLE: &[&str] = &[
87    "aircraft",
88    "bison",
89    "buffalo",
90    "deer",
91    "equipment",
92    "fish",
93    "furniture",
94    "information",
95    "machinery",
96    "moose",
97    "news",
98    "rice",
99    "salmon",
100    "series",
101    "sheep",
102    "shrimp",
103    "software",
104    "species",
105    "swine",
106    "trout",
107    "tuna",
108];
109
110/// Convert a plural English word to its singular form.
111///
112/// This function handles:
113/// - Irregular plurals (children → child, people → person, etc.)
114/// - Uncountable nouns (sheep, fish, etc.) - returned unchanged
115/// - Standard suffix rules:
116///   - `-ies` → `-y` (dependencies → dependency)
117///   - `-ves` → `-f` or `-fe` (wolves → wolf, knives → knife)
118///   - `-es` → remove `-es` for words ending in s, x, z, ch, sh (boxes → box)
119///   - `-s` → remove `-s` (items → item)
120///
121/// # Examples
122///
123/// ```
124/// use facet_singularize::singularize;
125///
126/// // Irregular
127/// assert_eq!(singularize("children"), "child");
128/// assert_eq!(singularize("people"), "person");
129/// assert_eq!(singularize("mice"), "mouse");
130///
131/// // Standard rules
132/// assert_eq!(singularize("dependencies"), "dependency");
133/// assert_eq!(singularize("boxes"), "box");
134/// assert_eq!(singularize("items"), "item");
135/// assert_eq!(singularize("wolves"), "wolf");
136///
137/// // Uncountable (unchanged)
138/// assert_eq!(singularize("sheep"), "sheep");
139/// assert_eq!(singularize("fish"), "fish");
140/// ```
141#[cfg(feature = "alloc")]
142pub fn singularize(word: &str) -> String {
143    // Check irregulars first (binary search since list is sorted)
144    if let Ok(idx) = IRREGULARS.binary_search_by_key(&word, |&(plural, _)| plural) {
145        return String::from(IRREGULARS[idx].1);
146    }
147
148    // Check uncountable
149    if UNCOUNTABLE.binary_search(&word).is_ok() {
150        return String::from(word);
151    }
152
153    // Apply suffix rules
154    if let Some(singular) = try_singularize_suffix(word) {
155        return singular;
156    }
157
158    // No rule matched, return as-is
159    String::from(word)
160}
161
162/// Check if a singular word could be the singular form of a plural word.
163///
164/// This is useful for matching node names to field names in deserialization:
165/// - `is_singular_of("dependency", "dependencies")` → `true`
166/// - `is_singular_of("child", "children")` → `true`
167/// - `is_singular_of("item", "items")` → `true`
168///
169/// This function is allocation-free when possible.
170pub fn is_singular_of(singular: &str, plural: &str) -> bool {
171    // Exact match (for uncountable or same word)
172    if singular == plural {
173        return true;
174    }
175
176    // Check irregulars - search by plural, compare singular
177    if let Ok(idx) = IRREGULARS.binary_search_by_key(&plural, |&(p, _)| p) {
178        return IRREGULARS[idx].1 == singular;
179    }
180
181    // Check uncountable
182    if UNCOUNTABLE.binary_search(&plural).is_ok() {
183        return singular == plural;
184    }
185
186    // Check suffix rules without allocation
187    is_singular_of_by_suffix(singular, plural)
188}
189
190/// Try to singularize using suffix rules, returning None if no rule matches.
191#[cfg(feature = "alloc")]
192fn try_singularize_suffix(word: &str) -> Option<String> {
193    let len = word.len();
194
195    // Need at least 2 characters
196    if len < 2 {
197        return None;
198    }
199
200    // -ies → -y (but not -eies, -aies which become -ey, -ay)
201    if len > 3 && word.ends_with("ies") {
202        if ie_exceptions::contains(word) {
203            let prefix = &word[..len - 3];
204            return Some(alloc::format!("{prefix}ie"));
205        }
206        let prefix = &word[..len - 3];
207        // Common -ie base words are handled via the exception list.
208        let last_char = prefix.chars().last()?;
209        if !matches!(last_char, 'a' | 'e' | 'o' | 'u') {
210            return Some(alloc::format!("{prefix}y"));
211        }
212    }
213
214    // -ves → -f or -fe
215    if len > 3 && word.ends_with("ves") {
216        let prefix = &word[..len - 3];
217        // Common -ves → -fe patterns: knives→knife, wives→wife, lives→life
218        if matches!(prefix, "kni" | "wi" | "li") {
219            return Some(alloc::format!("{prefix}fe"));
220        }
221        // -eaves → -eaf (leaves→leaf, sheaves→sheaf)
222        if prefix.ends_with("ea") {
223            return Some(alloc::format!("{prefix}f"));
224        }
225        // -oaves → -oaf (loaves→loaf)
226        if prefix.ends_with("oa") {
227            return Some(alloc::format!("{prefix}f"));
228        }
229        // -alves → -alf (halves→half, calves→calf)
230        if prefix.ends_with("al") {
231            return Some(alloc::format!("{prefix}f"));
232        }
233        // -elves → -elf (shelves→shelf, selves→self, elves→elf)
234        if prefix.ends_with("el") || prefix == "el" {
235            return Some(alloc::format!("{prefix}f"));
236        }
237        // -olves → -olf (wolves→wolf)
238        if prefix.ends_with("ol") {
239            return Some(alloc::format!("{prefix}f"));
240        }
241        // Default: -ves → -f (might not be correct for all words)
242        return Some(alloc::format!("{prefix}f"));
243    }
244
245    // -es → remove for sibilants (s, x, z, ch, sh)
246    if len > 2 && word.ends_with("es") {
247        let prefix = &word[..len - 2];
248
249        // -zzes → -z (quizzes→quiz, fizzes→fiz)
250        if prefix.ends_with("zz") {
251            return Some(String::from(&prefix[..prefix.len() - 1]));
252        }
253        // -sses → -ss (classes→class, but also masses→mass)
254        // However "classes" should become "class", so we keep the double s
255        if prefix.ends_with("ss") {
256            return Some(String::from(prefix));
257        }
258
259        if prefix.ends_with('s')
260            || prefix.ends_with('x')
261            || prefix.ends_with('z')
262            || prefix.ends_with("ch")
263            || prefix.ends_with("sh")
264        {
265            return Some(String::from(prefix));
266        }
267        // -oes → -o for some words (heroes→hero, potatoes→potato)
268        if prefix.ends_with('o') {
269            return Some(String::from(prefix));
270        }
271    }
272
273    // -s → remove (most common case, check last)
274    if word.ends_with('s') && !word.ends_with("ss") {
275        let prefix = &word[..len - 1];
276        if !prefix.is_empty() {
277            return Some(String::from(prefix));
278        }
279    }
280
281    None
282}
283
284/// Check if singular matches plural by suffix rules, without allocation.
285fn is_singular_of_by_suffix(singular: &str, plural: &str) -> bool {
286    let s_len = singular.len();
287    let p_len = plural.len();
288
289    // -ies → -ie (exception list)
290    if p_len == s_len + 1
291        && plural.ends_with("ies")
292        && singular.ends_with("ie")
293        && ie_exceptions::contains(plural)
294    {
295        return plural[..p_len - 3] == singular[..s_len - 2];
296    }
297
298    // -ies → -y
299    if p_len == s_len + 2 && plural.ends_with("ies") && singular.ends_with('y') {
300        return plural[..p_len - 3] == singular[..s_len - 1];
301    }
302
303    // -ves → -f
304    if p_len == s_len + 2 && plural.ends_with("ves") && singular.ends_with('f') {
305        return plural[..p_len - 3] == singular[..s_len - 1];
306    }
307
308    // -ves → -fe
309    if p_len == s_len + 1 && plural.ends_with("ves") && singular.ends_with("fe") {
310        return plural[..p_len - 3] == singular[..s_len - 2];
311    }
312
313    // -es → remove (for sibilants)
314    if p_len == s_len + 2 && plural.ends_with("es") && &plural[..p_len - 2] == singular {
315        // Check singular ends with sibilant
316        return singular.ends_with('s')
317            || singular.ends_with('x')
318            || singular.ends_with('z')
319            || singular.ends_with("ch")
320            || singular.ends_with("sh")
321            || singular.ends_with('o');
322    }
323
324    // -s → remove
325    if p_len == s_len + 1 && plural.ends_with('s') && !plural.ends_with("ss") {
326        return &plural[..p_len - 1] == singular;
327    }
328
329    // Exact match (uncountable that wasn't in our list)
330    singular == plural
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_irregulars() {
339        assert_eq!(singularize("children"), "child");
340        assert_eq!(singularize("people"), "person");
341        assert_eq!(singularize("mice"), "mouse");
342        assert_eq!(singularize("feet"), "foot");
343        assert_eq!(singularize("teeth"), "tooth");
344        assert_eq!(singularize("geese"), "goose");
345        assert_eq!(singularize("men"), "man");
346        assert_eq!(singularize("women"), "woman");
347        assert_eq!(singularize("oxen"), "ox");
348        assert_eq!(singularize("dice"), "die");
349        assert_eq!(singularize("indices"), "index");
350        assert_eq!(singularize("vertices"), "vertex");
351        assert_eq!(singularize("matrices"), "matrix");
352        assert_eq!(singularize("criteria"), "criterion");
353        assert_eq!(singularize("phenomena"), "phenomenon");
354        assert_eq!(singularize("data"), "datum");
355        assert_eq!(singularize("media"), "medium");
356    }
357
358    #[test]
359    fn test_ie_plurals() {
360        assert_eq!(singularize("movies"), "movie");
361        assert_eq!(singularize("cookies"), "cookie");
362        assert_eq!(singularize("pies"), "pie");
363        assert_eq!(singularize("ties"), "tie");
364        assert_eq!(singularize("brownies"), "brownie");
365        assert_eq!(singularize("rookies"), "rookie");
366        assert_eq!(singularize("selfies"), "selfie");
367    }
368
369    #[test]
370    fn test_uncountable() {
371        assert_eq!(singularize("sheep"), "sheep");
372        assert_eq!(singularize("fish"), "fish");
373        assert_eq!(singularize("deer"), "deer");
374        assert_eq!(singularize("moose"), "moose");
375        assert_eq!(singularize("series"), "series");
376        assert_eq!(singularize("species"), "species");
377        assert_eq!(singularize("news"), "news");
378        assert_eq!(singularize("software"), "software");
379    }
380
381    #[test]
382    fn test_ies_to_y() {
383        assert_eq!(singularize("dependencies"), "dependency");
384        assert_eq!(singularize("categories"), "category");
385        assert_eq!(singularize("stories"), "story");
386        assert_eq!(singularize("cities"), "city");
387        assert_eq!(singularize("parties"), "party");
388        assert_eq!(singularize("queries"), "query");
389        assert_eq!(singularize("policies"), "policy");
390        assert_eq!(singularize("ponies"), "pony");
391        assert_eq!(singularize("babies"), "baby");
392    }
393
394    #[test]
395    fn test_ves_to_f() {
396        assert_eq!(singularize("wolves"), "wolf");
397        assert_eq!(singularize("halves"), "half");
398        assert_eq!(singularize("shelves"), "shelf");
399        assert_eq!(singularize("leaves"), "leaf");
400        assert_eq!(singularize("calves"), "calf");
401    }
402
403    #[test]
404    fn test_ves_to_fe() {
405        assert_eq!(singularize("knives"), "knife");
406        assert_eq!(singularize("wives"), "wife");
407        assert_eq!(singularize("lives"), "life");
408    }
409
410    #[test]
411    fn test_es_sibilants() {
412        assert_eq!(singularize("boxes"), "box");
413        assert_eq!(singularize("matches"), "match");
414        assert_eq!(singularize("watches"), "watch");
415        assert_eq!(singularize("dishes"), "dish");
416        assert_eq!(singularize("bushes"), "bush");
417        assert_eq!(singularize("classes"), "class");
418        assert_eq!(singularize("buses"), "bus");
419        assert_eq!(singularize("quizzes"), "quiz");
420    }
421
422    #[test]
423    fn test_oes_to_o() {
424        assert_eq!(singularize("heroes"), "hero");
425        assert_eq!(singularize("potatoes"), "potato");
426        assert_eq!(singularize("tomatoes"), "tomato");
427        assert_eq!(singularize("echoes"), "echo");
428    }
429
430    #[test]
431    fn test_simple_s() {
432        assert_eq!(singularize("items"), "item");
433        assert_eq!(singularize("samples"), "sample");
434        assert_eq!(singularize("users"), "user");
435        assert_eq!(singularize("configs"), "config");
436        assert_eq!(singularize("servers"), "server");
437        assert_eq!(singularize("handlers"), "handler");
438    }
439
440    #[test]
441    fn test_is_singular_of() {
442        // Irregulars
443        assert!(is_singular_of("child", "children"));
444        assert!(is_singular_of("person", "people"));
445        assert!(is_singular_of("mouse", "mice"));
446
447        // Standard rules
448        assert!(is_singular_of("dependency", "dependencies"));
449        assert!(is_singular_of("box", "boxes"));
450        assert!(is_singular_of("item", "items"));
451        assert!(is_singular_of("wolf", "wolves"));
452        assert!(is_singular_of("knife", "knives"));
453        assert!(is_singular_of("movie", "movies"));
454        assert!(is_singular_of("cookie", "cookies"));
455        assert!(is_singular_of("pie", "pies"));
456        assert!(is_singular_of("tie", "ties"));
457
458        // Uncountable
459        assert!(is_singular_of("sheep", "sheep"));
460        assert!(is_singular_of("fish", "fish"));
461
462        // Non-matches
463        assert!(!is_singular_of("cat", "dogs"));
464        assert!(!is_singular_of("dependency", "items"));
465    }
466
467    #[test]
468    fn test_already_singular() {
469        // Words that don't end in common plural suffixes should be returned as-is
470        assert_eq!(singularize("config"), "config");
471        assert_eq!(singularize("item"), "item");
472    }
473}