Skip to main content

lindera_dictionary/builder/
unknown_dictionary.rs

1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{self, Write};
4use std::path::Path;
5
6use derive_builder::Builder;
7use log::debug;
8
9use crate::LinderaResult;
10use crate::dictionary::character_definition::CharacterDefinition;
11use crate::dictionary::unknown_dictionary::parse_unk;
12use crate::error::LinderaErrorKind;
13use crate::util::{read_file_with_encoding, write_data};
14
15#[derive(Builder, Debug)]
16#[builder(name = UnknownDictionaryBuilderOptions)]
17#[builder(build_fn(name = "builder"))]
18pub struct UnknownDictionaryBuilder {
19    #[builder(default = "\"UTF-8\".into()", setter(into))]
20    encoding: Cow<'static, str>,
21}
22
23impl UnknownDictionaryBuilder {
24    pub fn build(
25        &self,
26        input_dir: &Path,
27        chardef: &CharacterDefinition,
28        output_dir: &Path,
29    ) -> LinderaResult<()> {
30        let unk_data_path = input_dir.join("unk.def");
31        debug!("reading {unk_data_path:?}");
32        let unk_data = read_file_with_encoding(&unk_data_path, &self.encoding)?;
33        let unknown_dictionary = parse_unk(chardef.categories(), &unk_data)?;
34
35        let mut unk_buffer = Vec::new();
36        let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&unknown_dictionary).map_err(|err| {
37            LinderaErrorKind::Serialize
38                .with_error(anyhow::anyhow!(err))
39                .add_context("Failed to serialize unknown dictionary data")
40        })?;
41        unk_buffer.write_all(&bytes).map_err(|err| {
42            LinderaErrorKind::Io
43                .with_error(anyhow::anyhow!(err))
44                .add_context("Failed to write unknown dictionary data to buffer")
45        })?;
46
47        let wtr_unk_path = output_dir.join(Path::new("unk.bin"));
48        let mut wtr_unk = io::BufWriter::new(
49            File::create(wtr_unk_path)
50                .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
51        );
52
53        write_data(&unk_buffer, &mut wtr_unk)?;
54
55        wtr_unk
56            .flush()
57            .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
58
59        Ok(())
60    }
61}