Skip to main content

inputx_pinyin/
fuzzy.rs

1//! Fuzzy syllable expansion — `z↔zh`, `c↔ch`, `s↔sh`, `n↔l`, `f↔h`, `r↔l`,
2//! `in↔ing`, `en↔eng`, `an↔ang`. Toggleable per-pair so users can match
3//! their own dialect / typing habits. Expansion happens at lookup time;
4//! the dictionary stays canonical (no bloat).
5//!
6//! v0.1 wires the config + expansion machinery; the engine consults it at
7//! lookup. The expansion currently produces at most one alternate form per
8//! input syllable (a single rule application). Multi-rule cascades (e.g.,
9//! `zin → zhin → zhing`) are out of scope for v0.1; the long tail is
10//! addressed in v0.3 once L0 ranking can demote noise.
11
12/// Toggleable fuzzy-pair flags. All off by default.
13#[derive(Debug, Clone, Copy, Default)]
14pub struct FuzzyConfig {
15    /// `z` ↔ `zh`
16    pub z_zh: bool,
17    /// `c` ↔ `ch`
18    pub c_ch: bool,
19    /// `s` ↔ `sh`
20    pub s_sh: bool,
21    /// `n` ↔ `l` (initial only)
22    pub n_l: bool,
23    /// `f` ↔ `h`
24    pub f_h: bool,
25    /// `r` ↔ `l` (initial only)
26    pub r_l: bool,
27    /// `in` ↔ `ing` (final)
28    pub in_ing: bool,
29    /// `en` ↔ `eng` (final)
30    pub en_eng: bool,
31    /// `an` ↔ `ang` (final)
32    pub an_ang: bool,
33}
34
35impl FuzzyConfig {
36    /// All pairs off — strict pinyin matching.
37    pub const fn strict() -> Self {
38        Self {
39            z_zh: false,
40            c_ch: false,
41            s_sh: false,
42            n_l: false,
43            f_h: false,
44            r_l: false,
45            in_ing: false,
46            en_eng: false,
47            an_ang: false,
48        }
49    }
50
51    /// All pairs on — common dialect-tolerant default for southern speakers.
52    pub const fn permissive() -> Self {
53        Self {
54            z_zh: true,
55            c_ch: true,
56            s_sh: true,
57            n_l: true,
58            f_h: true,
59            r_l: true,
60            in_ing: true,
61            en_eng: true,
62            an_ang: true,
63        }
64    }
65
66    /// Returns the canonical syllable plus any fuzzy alternates this config
67    /// permits. The result always begins with `syl` itself; alternates are
68    /// only added when the rule applies.
69    ///
70    /// Single-rule expansion only — does not cascade. `s.iter().count()`
71    /// is between 1 and ~3 in practice.
72    pub fn expand(&self, syl: &str) -> Vec<String> {
73        let mut out = vec![syl.to_string()];
74
75        // Initial-position swaps: produce the partner form, leaving the rest
76        // of the syllable intact.
77        let initial_swaps: &[(bool, &str, &str)] = &[
78            (self.z_zh, "zh", "z"),
79            (self.z_zh, "z", "zh"),
80            (self.c_ch, "ch", "c"),
81            (self.c_ch, "c", "ch"),
82            (self.s_sh, "sh", "s"),
83            (self.s_sh, "s", "sh"),
84            (self.n_l, "n", "l"),
85            (self.n_l, "l", "n"),
86            (self.f_h, "f", "h"),
87            (self.f_h, "h", "f"),
88            (self.r_l, "r", "l"),
89            (self.r_l, "l", "r"),
90        ];
91        for (on, from, to) in initial_swaps {
92            if *on && let Some(rest) = syl.strip_prefix(from) {
93                // Avoid the n↔l double-rule producing nonsense by only
94                // applying if the prefix isn't simultaneously the partner of
95                // another active rule (e.g., r→l shouldn't fire on "ri" as if
96                // "li" were a more natural read; both are valid syllables).
97                let mut alt = String::with_capacity(syl.len());
98                alt.push_str(to);
99                alt.push_str(rest);
100                if alt != syl && !out.contains(&alt) {
101                    out.push(alt);
102                }
103            }
104        }
105
106        // Final-position swaps.
107        let final_swaps: &[(bool, &str, &str)] = &[
108            (self.in_ing, "ing", "in"),
109            (self.in_ing, "in", "ing"),
110            (self.en_eng, "eng", "en"),
111            (self.en_eng, "en", "eng"),
112            (self.an_ang, "ang", "an"),
113            (self.an_ang, "an", "ang"),
114        ];
115        for (on, from, to) in final_swaps {
116            if *on && syl.ends_with(from) {
117                let head = &syl[..syl.len() - from.len()];
118                let mut alt = String::with_capacity(syl.len());
119                alt.push_str(head);
120                alt.push_str(to);
121                if alt != syl && !out.contains(&alt) {
122                    out.push(alt);
123                }
124            }
125        }
126
127        out
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn default_is_strict() {
137        let f = FuzzyConfig::default();
138        assert_eq!(f.expand("zhong"), vec!["zhong"]);
139    }
140
141    #[test]
142    fn z_zh_swap_both_directions() {
143        let f = FuzzyConfig {
144            z_zh: true,
145            ..FuzzyConfig::default()
146        };
147        let out = f.expand("zhong");
148        assert!(out.contains(&"zhong".to_string()));
149        assert!(out.contains(&"zong".to_string()));
150
151        let out = f.expand("zai");
152        assert!(out.contains(&"zhai".to_string()));
153    }
154
155    #[test]
156    fn final_swaps_independent() {
157        let f = FuzzyConfig {
158            in_ing: true,
159            ..FuzzyConfig::default()
160        };
161        assert!(f.expand("xing").contains(&"xin".to_string()));
162        assert!(f.expand("xin").contains(&"xing".to_string()));
163        // Other finals untouched.
164        assert_eq!(f.expand("xian"), vec!["xian"]);
165    }
166
167    #[test]
168    fn permissive_includes_canonical_first() {
169        let f = FuzzyConfig::permissive();
170        let out = f.expand("zhong");
171        assert_eq!(out[0], "zhong");
172        // Should produce z- alternate (z_zh) and -ong stays.
173        assert!(out.contains(&"zong".to_string()));
174    }
175
176    #[test]
177    fn initial_and_final_compose_only_singly() {
178        // v0.1 explicitly does single-rule expansion only — no cascading.
179        let f = FuzzyConfig::permissive();
180        let out = f.expand("zin");
181        // Single z→zh produces "zhin"; single in→ing produces "zing". Both ok.
182        assert!(
183            out.contains(&"zhin".to_string()),
184            "expected single z→zh: {out:?}"
185        );
186        assert!(
187            out.contains(&"zing".to_string()),
188            "expected single in→ing: {out:?}"
189        );
190        // Cascade (z→zh THEN in→ing) would produce "zhing" — disallowed.
191        assert!(
192            !out.contains(&"zhing".to_string()),
193            "cascade leaked: {out:?}"
194        );
195    }
196}