lindera_dictionary/builder/
connection_cost_matrix.rs1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{self, Write};
4use std::path::Path;
5use std::str::FromStr;
6
7use byteorder::{LittleEndian, WriteBytesExt};
8use derive_builder::Builder;
9use log::debug;
10
11use crate::LinderaResult;
12use crate::decompress::Algorithm;
13use crate::error::LinderaErrorKind;
14use crate::util::{compress_write, read_file_with_encoding};
15
16#[derive(Builder, Debug)]
17#[builder(name = ConnectionCostMatrixBuilderOptions)]
18#[builder(build_fn(name = "builder"))]
19pub struct ConnectionCostMatrixBuilder {
20 #[builder(default = "\"UTF-8\".into()", setter(into))]
21 encoding: Cow<'static, str>,
22 #[builder(default = "Algorithm::Deflate")]
23 compress_algorithm: Algorithm,
24}
25
26impl ConnectionCostMatrixBuilder {
27 pub fn build(&self, input_dir: &Path, output_dir: &Path) -> LinderaResult<()> {
28 let matrix_data_path = input_dir.join("matrix.def");
29 debug!("reading {matrix_data_path:?}");
30 let matrix_data = read_file_with_encoding(&matrix_data_path, &self.encoding)?;
31
32 let mut lines = Vec::new();
33 for line in matrix_data.lines() {
34 let fields: Vec<i32> = line
35 .split_whitespace()
36 .map(i32::from_str)
37 .collect::<Result<_, _>>()
38 .map_err(|err| LinderaErrorKind::Parse.with_error(anyhow::anyhow!(err)))?;
39 lines.push(fields);
40 }
41 let mut lines_it = lines.into_iter();
42 let header = lines_it.next().ok_or_else(|| {
43 LinderaErrorKind::Content.with_error(anyhow::anyhow!("unknown error"))
44 })?;
45 let forward_size = header[0] as u32;
46 let backward_size = header[1] as u32;
47 let len = 2 + (forward_size * backward_size) as usize;
48 let mut costs = vec![i16::MAX; len];
49 costs[0] = forward_size as i16;
50 costs[1] = backward_size as i16;
51 for fields in lines_it {
52 let forward_id = fields[0] as u32;
53 let backward_id = fields[1] as u32;
54 let cost = fields[2] as u16;
55 costs[2 + (backward_id + forward_id * backward_size) as usize] = cost as i16;
56 }
57
58 let wtr_matrix_mtx_path = output_dir.join(Path::new("matrix.mtx"));
59 let mut wtr_matrix_mtx = io::BufWriter::new(
60 File::create(wtr_matrix_mtx_path)
61 .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
62 );
63 let mut matrix_mtx_buffer = Vec::new();
64 for cost in costs {
65 matrix_mtx_buffer
66 .write_i16::<LittleEndian>(cost)
67 .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
68 }
69
70 compress_write(
71 &matrix_mtx_buffer,
72 self.compress_algorithm,
73 &mut wtr_matrix_mtx,
74 )?;
75
76 wtr_matrix_mtx
77 .flush()
78 .map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
79
80 Ok(())
81 }
82}