lindera_dictionary_builder/
unk.rs

1use std::borrow::Cow;
2use std::fs::File;
3use std::io;
4use std::io::Write;
5use std::path::Path;
6
7use derive_builder::Builder;
8use lindera_core::character_definition::CharacterDefinitions;
9use lindera_core::error::LinderaErrorKind;
10use lindera_core::unknown_dictionary::parse_unk;
11use lindera_core::LinderaResult;
12use lindera_decompress::Algorithm;
13use log::debug;
14
15use crate::utils::{compress_write, read_file_with_encoding};
16
17#[derive(Builder, Debug)]
18#[builder(name = "UnkBuilderOptions")]
19#[builder(build_fn(name = "builder"))]
20pub struct UnkBuilder {
21    #[builder(default = "\"UTF-8\".into()", setter(into))]
22    encoding: Cow<'static, str>,
23    #[builder(default = "Algorithm::Deflate")]
24    compress_algorithm: Algorithm,
25    #[builder(default = "11")]
26    unk_fields_num: usize,
27}
28
29impl UnkBuilder {
30    pub fn build(
31        &self,
32        input_dir: &Path,
33        chardef: &CharacterDefinitions,
34        output_dir: &Path,
35    ) -> LinderaResult<()> {
36        let unk_data_path = input_dir.join("unk.def");
37        debug!("reading {:?}", unk_data_path);
38        let unk_data = read_file_with_encoding(&unk_data_path, &self.encoding)?;
39        let unknown_dictionary = parse_unk(chardef.categories(), &unk_data, self.unk_fields_num)?;
40
41        let mut unk_buffer = Vec::new();
42        bincode::serialize_into(&mut unk_buffer, &unknown_dictionary)
43            .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))?;
44
45        let wtr_unk_path = output_dir.join(Path::new("unk.bin"));
46        let mut wtr_unk = io::BufWriter::new(
47            File::create(wtr_unk_path)
48                .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
49        );
50
51        compress_write(&unk_buffer, self.compress_algorithm, &mut wtr_unk)?;
52
53        wtr_unk
54            .flush()
55            .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
56
57        Ok(())
58    }
59}