lindera_dictionary/dictionary_builder/
unknown_dictionary.rs

1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{self, Write};
4use std::path::Path;
5
6use derive_builder::Builder;
7use log::debug;
8
9use crate::decompress::Algorithm;
10use crate::dictionary::character_definition::CharacterDefinition;
11use crate::dictionary::unknown_dictionary::parse_unk;
12use crate::error::LinderaErrorKind;
13use crate::util::{compress_write, read_file_with_encoding};
14use crate::LinderaResult;
15
16#[derive(Builder, Debug)]
17#[builder(name = UnknownDictionaryBuilderOptions)]
18#[builder(build_fn(name = "builder"))]
19pub struct UnknownDictionaryBuilder {
20    #[builder(default = "\"UTF-8\".into()", setter(into))]
21    encoding: Cow<'static, str>,
22    #[builder(default = "Algorithm::Deflate")]
23    compress_algorithm: Algorithm,
24    #[builder(default = "11")]
25    unk_fields_num: usize,
26}
27
28impl UnknownDictionaryBuilder {
29    pub fn build(
30        &self,
31        input_dir: &Path,
32        chardef: &CharacterDefinition,
33        output_dir: &Path,
34    ) -> LinderaResult<()> {
35        let unk_data_path = input_dir.join("unk.def");
36        debug!("reading {:?}", unk_data_path);
37        let unk_data = read_file_with_encoding(&unk_data_path, &self.encoding)?;
38        let unknown_dictionary = parse_unk(chardef.categories(), &unk_data, self.unk_fields_num)?;
39
40        let mut unk_buffer = Vec::new();
41        bincode::serialize_into(&mut unk_buffer, &unknown_dictionary)
42            .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))?;
43
44        let wtr_unk_path = output_dir.join(Path::new("unk.bin"));
45        let mut wtr_unk = io::BufWriter::new(
46            File::create(wtr_unk_path)
47                .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
48        );
49
50        compress_write(&unk_buffer, self.compress_algorithm, &mut wtr_unk)?;
51
52        wtr_unk
53            .flush()
54            .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
55
56        Ok(())
57    }
58}