lindera_dictionary/dictionary_builder/
connection_cost_matrix.rsuse std::borrow::Cow;
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use std::str::FromStr;
use byteorder::{LittleEndian, WriteBytesExt};
use derive_builder::Builder;
use log::debug;
use crate::decompress::Algorithm;
use crate::error::LinderaErrorKind;
use crate::util::{compress_write, read_file_with_encoding};
use crate::LinderaResult;
#[derive(Builder, Debug)]
#[builder(name = ConnectionCostMatrixBuilderOptions)]
#[builder(build_fn(name = "builder"))]
pub struct ConnectionCostMatrixBuilder {
#[builder(default = "\"UTF-8\".into()", setter(into))]
encoding: Cow<'static, str>,
#[builder(default = "Algorithm::Deflate")]
compress_algorithm: Algorithm,
}
impl ConnectionCostMatrixBuilder {
pub fn build(&self, input_dir: &Path, output_dir: &Path) -> LinderaResult<()> {
let matrix_data_path = input_dir.join("matrix.def");
debug!("reading {:?}", matrix_data_path);
let matrix_data = read_file_with_encoding(&matrix_data_path, &self.encoding)?;
let mut lines = Vec::new();
for line in matrix_data.lines() {
let fields: Vec<i32> = line
.split_whitespace()
.map(i32::from_str)
.collect::<Result<_, _>>()
.map_err(|err| LinderaErrorKind::Parse.with_error(anyhow::anyhow!(err)))?;
lines.push(fields);
}
let mut lines_it = lines.into_iter();
let header = lines_it.next().ok_or_else(|| {
LinderaErrorKind::Content.with_error(anyhow::anyhow!("unknown error"))
})?;
let forward_size = header[0] as u32;
let backward_size = header[1] as u32;
let len = 2 + (forward_size * backward_size) as usize;
let mut costs = vec![i16::MAX; len];
costs[0] = forward_size as i16;
costs[1] = backward_size as i16;
for fields in lines_it {
let forward_id = fields[0] as u32;
let backward_id = fields[1] as u32;
let cost = fields[2] as u16;
costs[2 + (backward_id + forward_id * backward_size) as usize] = cost as i16;
}
let wtr_matrix_mtx_path = output_dir.join(Path::new("matrix.mtx"));
let mut wtr_matrix_mtx = io::BufWriter::new(
File::create(wtr_matrix_mtx_path)
.map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?,
);
let mut matrix_mtx_buffer = Vec::new();
for cost in costs {
matrix_mtx_buffer
.write_i16::<LittleEndian>(cost)
.map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
}
compress_write(
&matrix_mtx_buffer,
self.compress_algorithm,
&mut wtr_matrix_mtx,
)?;
wtr_matrix_mtx
.flush()
.map_err(|err| LinderaErrorKind::Io.with_error(anyhow::anyhow!(err)))?;
Ok(())
}
}