lindera_dictionary/builder/
unknown_dictionary.rs1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{self, Write};
4use std::path::Path;
5
6use derive_builder::Builder;
7use log::debug;
8
9use crate::LinderaResult;
10use crate::decompress::Algorithm;
11use crate::dictionary::character_definition::CharacterDefinition;
12use crate::dictionary::unknown_dictionary::parse_unk;
13use crate::error::LinderaErrorKind;
14use crate::util::{compress_write, read_file_with_encoding};
15
16#[derive(Builder, Debug)]
17#[builder(name = UnknownDictionaryBuilderOptions)]
18#[builder(build_fn(name = "builder"))]
19pub struct UnknownDictionaryBuilder {
20 #[builder(default = "\"UTF-8\".into()", setter(into))]
21 encoding: Cow<'static, str>,
22 #[builder(default = "Algorithm::Deflate")]
23 compress_algorithm: Algorithm,
24}
25
26impl UnknownDictionaryBuilder {
27 pub fn build(
28 &self,
29 input_dir: &Path,
30 chardef: &CharacterDefinition,
31 output_dir: &Path,
32 ) -> LinderaResult<()> {
33 let unk_data_path = input_dir.join("unk.def");
34 debug!("reading {unk_data_path:?}");
35 let unk_data = read_file_with_encoding(&unk_data_path, &self.encoding)?;
36 let unknown_dictionary = parse_unk(chardef.categories(), &unk_data)?;
37
38 let mut unk_buffer = Vec::new();
39 bincode::serde::encode_into_std_write(
40 &unknown_dictionary,
41 &mut unk_buffer,
42 bincode::config::legacy(),
43 )
44 .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))?;
45
46 let wtr_unk_path = output_dir.join(Path::new("unk.bin"));
47 let mut wtr_unk = io::BufWriter::new(
48 File::create(wtr_unk_path)
49 .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
50 );
51
52 compress_write(&unk_buffer, self.compress_algorithm, &mut wtr_unk)?;
53
54 wtr_unk
55 .flush()
56 .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
57
58 Ok(())
59 }
60}