libime_history_merge/
data_bytes.rs

1use serde::{de::Visitor, Deserialize};
2
3use crate::{
4    data::{History, Pool, Sentence, Word},
5    de::StringVisitor,
6    de_bytes::ByteSequenceVisitor,
7    from_bytes, Result,
8};
9
10pub const MAGIC: u32 = 0x000FC315;
11pub const FORMAT_VERSION: u32 = 0x02;
12
13impl History {
14    /// Load a history object from a [`libime`][libime]-compatible user history blob.  The format
15    /// is described as follows:
16    ///
17    /// * The blob begins with a 4-byte **file magic** [`00 0f c3 15`][MAGIC], then a 4-byte
18    ///   **format version** [`00 00 00 02`][FORMAT_VERSION], followed by 3 **pool**s.
19    /// * Each **pool** begins with a 4-byte **size** specifying the number of **sentence**(s)
20    ///   inside this **pool**, followed by the **pool**'s **sentence**(s).
21    /// * Each **sentence** begins with a 4-byte **size** specifying the number of **word**(s)
22    ///   inside this **sentence**, followed by the **sentence**'s **word**(s).
23    /// * Each **word** begins with a 4-byte **size** specifying the number of
24    ///   [nibbles][nibble-wiki] that the UTF-8 encoded representation of this word occupies,
25    ///   followed by the **word**'s [nibbles][nibble-wiki].
26    ///
27    /// The **file magic**, **format version**, **size**s are all [big-endian][endianness-wiki]
28    /// encoded.
29    ///
30    /// [file-magic]: crate::data_bytes::MAGIC
31    /// [format-version]: crate::data_bytes::FORMAT_VERSION
32    /// [libime]: <https://github.com/fcitx/libme>
33    /// [endianness-wiki]: <https://en.wikipedia.org/wiki/Endianness>
34    /// [nibble-wiki]: <https://en.wikipedia.org/wiki/Nibble>
35    pub fn load_from_bytes(content: &[u8]) -> Result<Self> {
36        let ret: HistoryFromBytes = from_bytes(content)?;
37        Ok(History::from(ret))
38    }
39}
40
41#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
42pub struct WordFromBytes(
43    /// Use `String` here because it is read from dumped `user.history` so it must be valid UTF-8.
44    pub String,
45);
46
47impl From<WordFromBytes> for Word {
48    fn from(wfb: WordFromBytes) -> Self {
49        Word(wfb.0)
50    }
51}
52
53impl<'de> Deserialize<'de> for WordFromBytes {
54    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
55    where
56        D: serde::Deserializer<'de>,
57    {
58        Ok(WordFromBytes(
59            deserializer.deserialize_string(StringVisitor)?,
60        ))
61    }
62}
63
64#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
65pub struct SentenceFromBytes(pub Vec<WordFromBytes>);
66
67impl From<SentenceFromBytes> for Sentence {
68    fn from(sfb: SentenceFromBytes) -> Self {
69        Self(sfb.0.into_iter().map(Word::from).collect())
70    }
71}
72
73impl<'de> Deserialize<'de> for SentenceFromBytes {
74    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
75    where
76        D: serde::Deserializer<'de>,
77    {
78        Ok(SentenceFromBytes(
79            deserializer.deserialize_seq(ByteSequenceVisitor::new())?,
80        ))
81    }
82}
83
84#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
85pub struct PoolFromBytes(pub Vec<SentenceFromBytes>);
86
87impl From<PoolFromBytes> for Pool {
88    fn from(pfb: PoolFromBytes) -> Self {
89        Pool(pfb.0.into_iter().map(Sentence::from).collect())
90    }
91}
92
93impl<'de> Deserialize<'de> for PoolFromBytes {
94    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
95    where
96        D: serde::Deserializer<'de>,
97    {
98        let newest_first: Vec<SentenceFromBytes> =
99            (deserializer.deserialize_seq(ByteSequenceVisitor::new())? as Vec<SentenceFromBytes>)
100                .iter()
101                .rev()
102                .cloned()
103                .collect();
104        Ok(PoolFromBytes(newest_first))
105    }
106}
107
108#[derive(Clone, Debug, PartialEq, PartialOrd)]
109pub struct HistoryFromBytes {
110    pub magic: u32,
111    pub format_version: u32,
112    pub pools: Vec<PoolFromBytes>,
113}
114
115impl From<HistoryFromBytes> for History {
116    fn from(hfb: HistoryFromBytes) -> Self {
117        History {
118            magic: hfb.magic,
119            format_version: hfb.format_version,
120            pools: hfb.pools.into_iter().map(Pool::from).collect(),
121        }
122    }
123}
124
125impl<'de> Deserialize<'de> for HistoryFromBytes {
126    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
127    where
128        D: serde::Deserializer<'de>,
129    {
130        struct HistoryVisitor;
131        impl<'de> Visitor<'de> for HistoryVisitor {
132            type Value = HistoryFromBytes;
133            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
134                formatter.write_str(
135                    "4 bytes of u32, then another 4 bytes of u32, then an array of pools (bincode)",
136                )
137            }
138            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
139            where
140                A: serde::de::SeqAccess<'de>,
141            {
142                // Load magic bytes
143                let mut magic_bytes: Vec<u8> = Vec::new();
144                for _ in 0..4 {
145                    magic_bytes.push(seq.next_element()?.unwrap());
146                }
147                let magic = u32::from_be_bytes(magic_bytes.try_into().unwrap());
148                if magic != MAGIC {
149                    return Err(serde::de::Error::custom(format!(
150                        "Invalid history magic (expected 0x{:08x}, got 0x{:08x})",
151                        MAGIC, magic,
152                    )));
153                }
154
155                let mut format_version_bytes: Vec<u8> = Vec::new();
156                for _ in 0..4 {
157                    format_version_bytes.push(seq.next_element()?.unwrap());
158                }
159                let format_version = u32::from_be_bytes(format_version_bytes.try_into().unwrap());
160                if format_version != FORMAT_VERSION {
161                    return Err(serde::de::Error::custom(format!(
162                        "Invalid format version (expected 0x{:08x}, got 0x{:08x})",
163                        FORMAT_VERSION, format_version,
164                    )));
165                }
166
167                let pools = ByteSequenceVisitor::new().visit_seq(seq)?;
168
169                Ok(HistoryFromBytes {
170                    magic,
171                    format_version,
172                    pools,
173                })
174            }
175        }
176
177        deserializer.deserialize_struct("", &[""], HistoryVisitor)
178    }
179}
180
181// Author: Blurgy <gy@blurgy.xyz>
182// Date:   Feb 03 2022, 11:45 [CST]