Skip to main content

prosaic_core/
quantify.rs

1//! Quantifier naturalization.
2//!
3//! Turn raw integers into natural quantifier phrases. `0` becomes
4//! "no", `1` becomes "a single", small counts stay exact (spelled for
5//! the smallest values, digits for the rest), and large counts get
6//! hedged ("hundreds of", "thousands of", "over N rounded to a
7//! natural bucket"). The pipe form is `{count|quantify}` with
8//! optional `:exact` and `:hedged` flavours.
9
10#[cfg(not(feature = "std"))]
11use alloc::format;
12#[cfg(not(feature = "std"))]
13use alloc::string::{String, ToString};
14
15use crate::language::Language;
16
17/// How the quantifier should be framed.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub enum QuantifyMode {
21    /// Natural default: 0 → "no", 1 → "a single", small numbers spelled
22    /// or digit as appropriate, large numbers hedged to natural buckets.
23    #[default]
24    Natural,
25    /// Always emit a digit count. Still handles 0/1 specially ("no",
26    /// "a single") because those read better even in exact contexts.
27    Exact,
28    /// Always hedge, even for small counts ("a few", "a handful of",
29    /// "several"). Useful when counts come from noisy measurements.
30    Hedged,
31}
32
33/// Produce a natural-language quantifier for `count` using the given
34/// `mode`. The language is used to spell out small numbers ("two",
35/// "three", …) via its `number_to_words` method.
36pub fn quantify(count: i64, mode: QuantifyMode, lang: &dyn Language) -> String {
37    match mode {
38        QuantifyMode::Exact => exact(count),
39        QuantifyMode::Hedged => hedged(count),
40        QuantifyMode::Natural => natural(count, lang),
41    }
42}
43
44fn exact(count: i64) -> String {
45    match count {
46        0 => "no".to_string(),
47        1 => "a single".to_string(),
48        _ => count.to_string(),
49    }
50}
51
52fn hedged(count: i64) -> String {
53    match count {
54        0 => "no".to_string(),
55        1 => "a single".to_string(),
56        2 => "a couple of".to_string(),
57        3..=5 => "a few".to_string(),
58        6..=12 => "a handful of".to_string(),
59        13..=19 => "a dozen or so".to_string(),
60        20..=49 => "dozens of".to_string(),
61        50..=199 => "scores of".to_string(),
62        200..=999 => "hundreds of".to_string(),
63        1000..=9999 => "thousands of".to_string(),
64        _ if count >= 10_000 => "tens of thousands of".to_string(),
65        _ => "no".to_string(), // negative counts treated as none
66    }
67}
68
69fn natural(count: i64, lang: &dyn Language) -> String {
70    match count {
71        i64::MIN..=0 => "no".to_string(),
72        1 => "a single".to_string(),
73        2..=12 => lang.number_to_words(count as usize),
74        13..=49 => count.to_string(),
75        50..=99 => format!("about {}", round_to(count, 10)),
76        100..=199 => "over a hundred".to_string(),
77        200..=999 => "hundreds of".to_string(),
78        1000..=9999 => "thousands of".to_string(),
79        _ => "tens of thousands of".to_string(),
80    }
81}
82
83fn round_to(n: i64, bucket: i64) -> i64 {
84    if bucket <= 0 {
85        return n;
86    }
87    let half = bucket / 2;
88    ((n + half) / bucket) * bucket
89}
90
91/// Parse a quantify spec argument into a mode.
92pub fn parse_mode(spec: &str) -> Option<QuantifyMode> {
93    match spec {
94        "natural" => Some(QuantifyMode::Natural),
95        "exact" => Some(QuantifyMode::Exact),
96        "hedged" => Some(QuantifyMode::Hedged),
97        _ => None,
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    use crate::language::{Conjunction, Language, Person, Tense};
105
106    /// A minimal language that spells out only 2..=12 as English words;
107    /// anything else falls through to digits. Enough for these tests.
108    struct MiniLang;
109
110    impl Language for MiniLang {
111        fn pluralize(&self, word: &str, count: usize) -> String {
112            if count == 1 {
113                word.to_string()
114            } else {
115                format!("{word}s")
116            }
117        }
118        fn singularize(&self, word: &str) -> String {
119            word.to_string()
120        }
121        fn article(&self, _word: &str) -> &str {
122            "a"
123        }
124        fn conjugate(&self, verb: &str, _t: Tense, _p: Person) -> String {
125            verb.to_string()
126        }
127        fn past_participle(&self, verb: &str) -> String {
128            format!("{verb}ed")
129        }
130        fn present_participle(&self, verb: &str) -> String {
131            format!("{verb}ing")
132        }
133        fn join_list(&self, items: &[&str], _c: Conjunction) -> String {
134            items.join(", ")
135        }
136        fn ordinal(&self, n: usize) -> String {
137            format!("{n}th")
138        }
139        fn number_to_words(&self, n: usize) -> String {
140            let words = [
141                "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
142                "ten", "eleven", "twelve",
143            ];
144            if n < words.len() {
145                words[n].to_string()
146            } else {
147                n.to_string()
148            }
149        }
150    }
151
152    fn lang() -> MiniLang {
153        MiniLang
154    }
155
156    #[test]
157    fn natural_zero_is_no() {
158        assert_eq!(quantify(0, QuantifyMode::Natural, &lang()), "no");
159    }
160
161    #[test]
162    fn natural_one_is_a_single() {
163        assert_eq!(quantify(1, QuantifyMode::Natural, &lang()), "a single");
164    }
165
166    #[test]
167    fn natural_small_is_spelled() {
168        assert_eq!(quantify(3, QuantifyMode::Natural, &lang()), "three");
169        assert_eq!(quantify(12, QuantifyMode::Natural, &lang()), "twelve");
170    }
171
172    #[test]
173    fn natural_medium_is_digit() {
174        assert_eq!(quantify(13, QuantifyMode::Natural, &lang()), "13");
175        assert_eq!(quantify(47, QuantifyMode::Natural, &lang()), "47");
176    }
177
178    #[test]
179    fn natural_50s_are_hedged_about_rounded() {
180        assert_eq!(quantify(55, QuantifyMode::Natural, &lang()), "about 60");
181        assert_eq!(quantify(77, QuantifyMode::Natural, &lang()), "about 80");
182    }
183
184    #[test]
185    fn natural_hundreds() {
186        assert_eq!(
187            quantify(150, QuantifyMode::Natural, &lang()),
188            "over a hundred"
189        );
190        assert_eq!(quantify(473, QuantifyMode::Natural, &lang()), "hundreds of");
191    }
192
193    #[test]
194    fn natural_thousands() {
195        assert_eq!(
196            quantify(5_000, QuantifyMode::Natural, &lang()),
197            "thousands of"
198        );
199        assert_eq!(
200            quantify(25_000, QuantifyMode::Natural, &lang()),
201            "tens of thousands of"
202        );
203    }
204
205    #[test]
206    fn exact_mode_uses_digit_for_non_zero_non_one() {
207        assert_eq!(quantify(0, QuantifyMode::Exact, &lang()), "no");
208        assert_eq!(quantify(1, QuantifyMode::Exact, &lang()), "a single");
209        assert_eq!(quantify(47, QuantifyMode::Exact, &lang()), "47");
210    }
211
212    #[test]
213    fn hedged_mode_uses_buckets_throughout() {
214        assert_eq!(quantify(4, QuantifyMode::Hedged, &lang()), "a few");
215        assert_eq!(quantify(10, QuantifyMode::Hedged, &lang()), "a handful of");
216        assert_eq!(quantify(30, QuantifyMode::Hedged, &lang()), "dozens of");
217        assert_eq!(quantify(300, QuantifyMode::Hedged, &lang()), "hundreds of");
218    }
219
220    #[test]
221    fn negative_counts_treated_as_none() {
222        assert_eq!(quantify(-5, QuantifyMode::Natural, &lang()), "no");
223    }
224}