ferrous_opencc/
compiler_logic.rs

1#[derive(Encode, Decode, Debug)]
2pub struct SerializableFstDict {
3    pub values: Vec<Vec<Arc<str>>>,
4    pub max_key_length: usize,
5}
6
7pub fn compile_dictionary(input_path: &Path) -> Result<Vec<u8>> {
8    let file = File::open(input_path)
9        .with_context(|| format!("Failed to open input dictionary: {}", input_path.display()))?;
10    let reader = BufReader::new(file);
11
12    let mut entries = BTreeMap::new();
13    let mut max_key_length = 0;
14
15    for line in reader.lines() {
16        let line = line.with_context(|| "Failed to read line from dictionary")?;
17        let parts: Vec<&str> = line.split('\t').collect();
18        if parts.len() == 2 {
19            let key = parts[0];
20            let values: Vec<Arc<str>> = parts[1].split(' ').map(|s| s.into()).collect();
21
22            if !key.is_empty() && !values.is_empty() && !values.iter().any(|s| s.is_empty()) {
23                max_key_length = max_key_length.max(key.chars().count());
24                entries.insert(key.to_string(), values);
25            }
26        }
27    }
28
29    let mut values_vec = Vec::with_capacity(entries.len());
30    let mut builder = MapBuilder::memory();
31
32    for (key, values) in entries {
33        let index = values_vec.len() as u64;
34        values_vec.push(values);
35        builder.insert(key, index).with_context(|| "Failed to insert key-value pair into FST")?;
36    }
37
38    let fst_map_bytes = builder
39        .into_inner()
40        .with_context(|| "Failed to finalize FST construction")?;
41
42    let metadata = SerializableFstDict {
43        values: values_vec,
44        max_key_length,
45    };
46
47    let metadata_bytes = bincode::encode_to_vec(&metadata, config::standard())
48        .with_context(|| "Bincode metadata serialization failed")?;
49
50    let mut final_bytes = Vec::new();
51
52    final_bytes.write_all(&(metadata_bytes.len() as u64).to_le_bytes())?;
53    final_bytes.write_all(&metadata_bytes)?;
54    final_bytes.write_all(&fst_map_bytes)?;
55
56    Ok(final_bytes)
57}