libime_history_merge/
de_text.rs

1use std::marker::PhantomData;
2
3use serde::de::{self, SeqAccess, Visitor};
4use serde::{Deserialize, Deserializer};
5
6use crate::{Error, Result};
7
8pub struct TextDeserializer<'de> {
9    input: &'de [u8],
10}
11
12impl<'de> TextDeserializer<'de> {
13    pub fn new(input: &'de [u8]) -> Self {
14        Self { input }
15    }
16
17    pub fn ended(&self) -> bool {
18        self.input.len() == 0
19    }
20
21    /// Returns the byte under cursor without consuming it
22    pub fn peek_byte(&self) -> Result<u8> {
23        if self.input.len() > 0 {
24            Ok(self.input[0])
25        } else {
26            Err(Error::EofError)
27        }
28    }
29
30    /// Consumes next `len` bytes from input and return it
31    pub fn next_exact_bytes(&mut self, len: usize) -> Result<&[u8]> {
32        if len > self.input.len() {
33            Err(Error::EofError)
34        } else {
35            let slce = &self.input[..len];
36            self.input = &self.input[len..];
37            Ok(slce)
38        }
39    }
40
41    /// Consumes until one of `candidate_chars` is occurred in input and return the visited bytes
42    pub fn pop_until(&mut self, candidate_chars: &[u8]) -> Result<&[u8]> {
43        let mut len: usize = 0;
44        while len < self.input.len() {
45            if candidate_chars.iter().any(|ch| ch == &self.input[len]) {
46                break;
47            }
48            len += 1;
49        }
50        let slce = &self.input[..len];
51        self.input = &self.input[len..];
52        Ok(slce)
53    }
54
55    /// Load next word, words are delimetered by space or new line character
56    pub fn next_word(&mut self) -> Result<String> {
57        let ret = String::from_utf8(self.pop_until(&[b' ', b'\n'])?.into())?;
58        Ok(ret)
59    }
60}
61
62pub fn from_text<'de, T>(b: &'de [u8]) -> Result<T>
63where
64    T: Deserialize<'de>,
65{
66    let mut deserializer = TextDeserializer::new(b);
67    let t = T::deserialize(&mut deserializer)?;
68
69    Ok(t)
70}
71
72impl<'de, 'a> Deserializer<'de> for &'a mut TextDeserializer<'de> {
73    type Error = Error;
74
75    fn deserialize_any<V>(self, _visitor: V) -> Result<V::Value>
76    where
77        V: Visitor<'de>,
78    {
79        unimplemented!()
80    }
81
82    fn deserialize_bool<V>(self, _visitor: V) -> Result<V::Value>
83    where
84        V: Visitor<'de>,
85    {
86        unimplemented!()
87    }
88
89    fn deserialize_i8<V>(self, _visitor: V) -> Result<V::Value>
90    where
91        V: Visitor<'de>,
92    {
93        unimplemented!()
94    }
95
96    fn deserialize_i16<V>(self, _visitor: V) -> Result<V::Value>
97    where
98        V: Visitor<'de>,
99    {
100        unimplemented!()
101    }
102
103    fn deserialize_i32<V>(self, _visitor: V) -> Result<V::Value>
104    where
105        V: Visitor<'de>,
106    {
107        unimplemented!()
108    }
109
110    fn deserialize_i64<V>(self, _visitor: V) -> Result<V::Value>
111    where
112        V: Visitor<'de>,
113    {
114        unimplemented!()
115    }
116
117    fn deserialize_u8<V>(self, _visitor: V) -> Result<V::Value>
118    where
119        V: Visitor<'de>,
120    {
121        unimplemented!()
122    }
123
124    fn deserialize_u16<V>(self, _visitor: V) -> Result<V::Value>
125    where
126        V: Visitor<'de>,
127    {
128        unimplemented!()
129    }
130
131    fn deserialize_u32<V>(self, _visitor: V) -> Result<V::Value>
132    where
133        V: Visitor<'de>,
134    {
135        unimplemented!()
136    }
137
138    fn deserialize_u64<V>(self, _visitor: V) -> Result<V::Value>
139    where
140        V: Visitor<'de>,
141    {
142        unimplemented!()
143    }
144
145    fn deserialize_f32<V>(self, _visitor: V) -> Result<V::Value>
146    where
147        V: Visitor<'de>,
148    {
149        unimplemented!()
150    }
151
152    fn deserialize_f64<V>(self, _visitor: V) -> Result<V::Value>
153    where
154        V: Visitor<'de>,
155    {
156        unimplemented!()
157    }
158
159    fn deserialize_char<V>(self, _visitor: V) -> Result<V::Value>
160    where
161        V: Visitor<'de>,
162    {
163        unimplemented!()
164    }
165
166    fn deserialize_str<V>(self, _visitor: V) -> Result<V::Value>
167    where
168        V: Visitor<'de>,
169    {
170        unimplemented!()
171    }
172
173    /// For WordFromText
174    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
175    where
176        V: Visitor<'de>,
177    {
178        visitor.visit_string(self.next_word()?)
179    }
180
181    fn deserialize_bytes<V>(self, _visitor: V) -> Result<V::Value>
182    where
183        V: Visitor<'de>,
184    {
185        unimplemented!()
186    }
187
188    fn deserialize_byte_buf<V>(self, _visitor: V) -> Result<V::Value>
189    where
190        V: Visitor<'de>,
191    {
192        unimplemented!()
193    }
194
195    fn deserialize_option<V>(self, _visitor: V) -> Result<V::Value>
196    where
197        V: Visitor<'de>,
198    {
199        unimplemented!()
200    }
201
202    fn deserialize_unit<V>(self, _visitor: V) -> Result<V::Value>
203    where
204        V: Visitor<'de>,
205    {
206        unimplemented!()
207    }
208
209    fn deserialize_unit_struct<V>(self, _name: &'static str, _visitor: V) -> Result<V::Value>
210    where
211        V: Visitor<'de>,
212    {
213        unimplemented!()
214    }
215
216    fn deserialize_newtype_struct<V>(self, _name: &'static str, _visitor: V) -> Result<V::Value>
217    where
218        V: Visitor<'de>,
219    {
220        unimplemented!()
221    }
222
223    /// For SentenceFromText
224    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
225    where
226        V: Visitor<'de>,
227    {
228        visitor.visit_seq(SpaceSeparated::new(self))
229    }
230
231    fn deserialize_tuple<V>(self, _len: usize, _visitor: V) -> Result<V::Value>
232    where
233        V: Visitor<'de>,
234    {
235        unimplemented!()
236    }
237
238    fn deserialize_tuple_struct<V>(
239        self,
240        _name: &'static str,
241        _len: usize,
242        _visitor: V,
243    ) -> Result<V::Value>
244    where
245        V: Visitor<'de>,
246    {
247        unimplemented!()
248    }
249
250    fn deserialize_map<V>(self, _visitor: V) -> Result<V::Value>
251    where
252        V: Visitor<'de>,
253    {
254        // visitor.visit_map(HistoryHelperStruct::new(self))
255        unimplemented!()
256    }
257
258    fn deserialize_struct<V>(
259        self,
260        _name: &'static str,
261        _fields: &'static [&'static str],
262        _visitor: V,
263    ) -> Result<V::Value>
264    where
265        V: Visitor<'de>,
266    {
267        unimplemented!()
268    }
269
270    fn deserialize_enum<V>(
271        self,
272        _name: &'static str,
273        _variants: &'static [&'static str],
274        _visitor: V,
275    ) -> Result<V::Value>
276    where
277        V: Visitor<'de>,
278    {
279        unimplemented!()
280    }
281
282    fn deserialize_identifier<V>(self, _visitor: V) -> Result<V::Value>
283    where
284        V: Visitor<'de>,
285    {
286        unimplemented!()
287    }
288
289    fn deserialize_ignored_any<V>(self, _visitor: V) -> Result<V::Value>
290    where
291        V: Visitor<'de>,
292    {
293        unimplemented!()
294    }
295}
296
297struct SpaceSeparated<'a, 'de: 'a> {
298    de: &'a mut TextDeserializer<'de>,
299    first: bool,
300}
301
302impl<'a, 'de> SpaceSeparated<'a, 'de> {
303    pub fn new(de: &'a mut TextDeserializer<'de>) -> Self {
304        Self { de, first: true }
305    }
306}
307
308impl<'a, 'de> SeqAccess<'de> for SpaceSeparated<'a, 'de> {
309    type Error = Error;
310
311    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
312    where
313        T: de::DeserializeSeed<'de>,
314    {
315        if self.de.peek_byte().is_err() {
316            return Ok(None);
317        }
318        if !self.first && self.de.next_exact_bytes(1)? != &[b' '] {
319            // return Err(Error::DeserializeError(
320            //     "Expected space character".to_string(),
321            // ));
322            return Ok(None);
323        }
324        self.first = false;
325        seed.deserialize(&mut *self.de).map(Some)
326    }
327}
328
329pub(crate) struct SpaceSeparatedVisitor<'de, ElementType>(PhantomData<&'de ElementType>)
330where
331    ElementType: Deserialize<'de>;
332
333impl<'de, ElementType> SpaceSeparatedVisitor<'de, ElementType>
334where
335    ElementType: Deserialize<'de>,
336{
337    pub fn new() -> Self {
338        Self(PhantomData)
339    }
340}
341
342impl<'de, ElementType> Visitor<'de> for SpaceSeparatedVisitor<'de, ElementType>
343where
344    ElementType: Deserialize<'de>,
345{
346    type Value = Vec<ElementType>;
347
348    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
349        formatter.write_str("space (' ') separated UTF-8 strings")
350    }
351
352    fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
353    where
354        A: SeqAccess<'de>,
355    {
356        let mut ret: Vec<ElementType> = Vec::new();
357        loop {
358            let value = seq.next_element()?;
359            match value {
360                Some(value) => ret.push(value),
361                None => break Ok(ret),
362            };
363        }
364    }
365}
366
367#[cfg(test)]
368mod tests {
369    use pretty_assertions::assert_eq;
370
371    use crate::{
372        data::{Sentence, Word},
373        data_text::{SentenceFromText, WordFromText},
374        from_text, Result,
375    };
376
377    #[test]
378    fn word() -> Result<()> {
379        let word_str = "音乐\n";
380        let word_text = word_str.as_bytes();
381        let expected_word = Word(word_str.trim_end().to_string());
382        assert_eq!(
383            Word::from(from_text::<WordFromText>(word_text)?),
384            expected_word
385        );
386        Ok(())
387    }
388
389    #[test]
390    fn sentence() -> Result<()> {
391        let sentence_str = "音乐 好听\n";
392        let sentence_text = sentence_str.as_bytes();
393        let expected_sentence = Sentence(
394            sentence_str
395                .trim_end()
396                .split(' ')
397                .map(String::from)
398                .map(Word)
399                .collect(),
400        );
401        assert_eq!(
402            Sentence::from(from_text::<SentenceFromText>(sentence_text)?),
403            expected_sentence,
404        );
405        Ok(())
406    }
407}