Skip to main content

wubi/
encode.rs

1//! High-level encoder API over [`crate::codec`]: zero-alloc `encode_into`
2//! plus an ergonomic `encode` that returns a stack [`EncodedCode`].
3//!
4//! The runtime lookup goes through [`crate::zigen::lookup`]. Build-time
5//! callers and tests should use [`crate::codec::encode_with_lookup`]
6//! directly with their own lookup closure.
7
8use core::fmt;
9
10use crate::codec::{EncodeError, encode_with_lookup};
11use crate::decomp::Decomp;
12use crate::zigen;
13
14/// Stack-allocated Wubi code (3 or 4 ASCII letters).
15#[derive(Copy, Clone, Eq, PartialEq)]
16pub struct EncodedCode {
17    bytes: [u8; 4],
18    len: u8,
19}
20
21impl EncodedCode {
22    /// Borrow the populated prefix (3 or 4 bytes). Trailing bytes of the
23    /// internal `[u8; 4]` are uninitialized and MUST NOT be read.
24    #[inline]
25    pub fn as_bytes(&self) -> &[u8] {
26        &self.bytes[..self.len as usize]
27    }
28    /// Borrow as `&str`. Always valid UTF-8 (encoder only emits ASCII).
29    #[inline]
30    pub fn as_str(&self) -> &str {
31        // SAFETY: encoder writes only ASCII (a–z, plus 'l' for 单笔画).
32        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
33    }
34    /// Number of populated code bytes — always 3 or 4 for a successfully-
35    /// encoded character.
36    #[inline]
37    pub fn len(&self) -> usize {
38        self.len as usize
39    }
40    /// `true` iff `len() == 0`. Only happens for default-constructed values
41    /// the encoder hasn't filled — successful encodes never return empty.
42    #[inline]
43    pub fn is_empty(&self) -> bool {
44        self.len == 0
45    }
46}
47
48impl fmt::Debug for EncodedCode {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "EncodedCode({:?})", self.as_str())
51    }
52}
53
54impl fmt::Display for EncodedCode {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        f.write_str(self.as_str())
57    }
58}
59
60/// Zero-allocation encode. Writes the code into `out` and returns the
61/// number of bytes written (3 or 4).
62#[inline]
63pub fn encode_into(decomp: &Decomp, out: &mut [u8; 4]) -> Result<usize, EncodeError> {
64    encode_with_lookup(&decomp.as_ref(), zigen::lookup, out)
65}
66
67/// Convenience: encode into a stack-allocated [`EncodedCode`].
68#[inline]
69pub fn encode(decomp: &Decomp) -> Result<EncodedCode, EncodeError> {
70    let mut out = [0u8; 4];
71    let n = encode_into(decomp, &mut out)?;
72    Ok(EncodedCode {
73        bytes: out,
74        len: n as u8,
75    })
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81    use crate::codec::{Shape, Stroke};
82    use crate::decomp::embedded_seed;
83
84    #[test]
85    fn encodes_jianming_as_letter_x4() {
86        let mut count = 0;
87        for (ch, decomp) in embedded_seed() {
88            if !crate::codec::JIANMING_ZIGEN.contains(ch) {
89                continue;
90            }
91            let code = encode(&decomp).expect("encode failed");
92            assert_eq!(code.len(), 4, "{ch}");
93            let bytes = code.as_bytes();
94            assert!(
95                bytes.iter().all(|b| *b == bytes[0]),
96                "{ch} → {} should be letter ×4",
97                code
98            );
99            count += 1;
100        }
101        assert_eq!(count, 25);
102    }
103
104    #[test]
105    fn encodes_dan_bi_hua() {
106        let cases: &[(char, &str, Stroke)] = &[
107            ('一', "ggll", Stroke::Heng),
108            ('丨', "hhll", Stroke::Shu),
109            ('丿', "ttll", Stroke::Pie),
110            ('丶', "yyll", Stroke::Na),
111            ('乙', "nnll", Stroke::Zhe),
112        ];
113        for (ch, expected, stroke) in cases {
114            let d = Decomp {
115                zigen: vec![*ch],
116                strokes: vec![*stroke],
117                shape: Shape::Whole,
118            };
119            let code = encode(&d).unwrap();
120            assert_eq!(code.as_str(), *expected, "{ch} mismatch");
121        }
122    }
123
124    #[test]
125    fn encodes_specific_jianming() {
126        let seed: std::collections::HashMap<char, _> = embedded_seed().into_iter().collect();
127        for (ch, expected) in &[
128            ('王', "gggg"),
129            ('土', "ffff"),
130            ('大', "dddd"),
131            ('禾', "tttt"),
132            ('纟', "xxxx"),
133        ] {
134            let d = seed.get(ch).unwrap();
135            let code = encode(d).unwrap();
136            assert_eq!(code.as_str(), *expected);
137        }
138    }
139
140    #[test]
141    fn encoder_does_not_allocate_on_zero_alloc_path() {
142        // Sanity: encode_into uses a stack buffer only.
143        let d = Decomp {
144            zigen: vec!['王'],
145            strokes: vec![Stroke::Heng],
146            shape: Shape::Whole,
147        };
148        let mut buf = [0u8; 4];
149        let n = encode_into(&d, &mut buf).unwrap();
150        assert_eq!(&buf[..n], b"gggg");
151    }
152}