ferrous_opencc/
compiler_logic.rs1#[derive(Encode, Decode, Debug)]
2pub struct SerializableFstDict {
3 pub values: Vec<Vec<Arc<str>>>,
4 pub max_key_length: usize,
5}
6
7pub fn compile_dictionary(input_path: &Path) -> Result<Vec<u8>> {
8 let file = File::open(input_path)
9 .with_context(|| format!("Failed to open input dictionary: {}", input_path.display()))?;
10 let reader = BufReader::new(file);
11
12 let mut entries = BTreeMap::new();
13 let mut max_key_length = 0;
14
15 for line in reader.lines() {
16 let line = line.with_context(|| "Failed to read line from dictionary")?;
17 let parts: Vec<&str> = line.split('\t').collect();
18 if parts.len() == 2 {
19 let key = parts[0];
20 let values: Vec<Arc<str>> = parts[1].split(' ').map(|s| s.into()).collect();
21
22 if !key.is_empty() && !values.is_empty() && !values.iter().any(|s| s.is_empty()) {
23 max_key_length = max_key_length.max(key.chars().count());
24 entries.insert(key.to_string(), values);
25 }
26 }
27 }
28
29 let mut values_vec = Vec::with_capacity(entries.len());
30 let mut builder = MapBuilder::memory();
31
32 for (key, values) in entries {
33 let index = values_vec.len() as u64;
34 values_vec.push(values);
35 builder.insert(key, index).with_context(|| "Failed to insert key-value pair into FST")?;
36 }
37
38 let fst_map_bytes = builder
39 .into_inner()
40 .with_context(|| "Failed to finalize FST construction")?;
41
42 let metadata = SerializableFstDict {
43 values: values_vec,
44 max_key_length,
45 };
46
47 let metadata_bytes = bincode::encode_to_vec(&metadata, config::standard())
48 .with_context(|| "Bincode metadata serialization failed")?;
49
50 let mut final_bytes = Vec::new();
51
52 final_bytes.write_all(&(metadata_bytes.len() as u64).to_le_bytes())?;
53 final_bytes.write_all(&metadata_bytes)?;
54 final_bytes.write_all(&fst_map_bytes)?;
55
56 Ok(final_bytes)
57}