lindera_dictionary/dictionary/
connection_cost_matrix.rs

1use crate::util::Data;
2
3use byteorder::{ByteOrder, LittleEndian};
4use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
5
6#[derive(Clone, Archive, RkyvSerialize, RkyvDeserialize)]
7pub struct ConnectionCostMatrix {
8    /// The connection cost matrix data.
9    /// Previously, this was `Data` (byte array) and costs were read using `LittleEndian::read_i16` at runtime.
10    /// Changed to `Vec<i16>` to enable direct array indexing and avoid deserialization overhead during tokenization.
11    pub costs_data: Vec<i16>,
12    pub backward_size: u32,
13}
14
15impl ConnectionCostMatrix {
16    pub fn load(conn_data: impl Into<Data>) -> ConnectionCostMatrix {
17        let conn_data = conn_data.into();
18        let backward_size = LittleEndian::read_i16(&conn_data[2..4]);
19        let size = conn_data.len() / 2 - 2;
20        let mut costs_data = vec![0i16; size];
21        LittleEndian::read_i16_into(&conn_data[4..], &mut costs_data);
22
23        ConnectionCostMatrix {
24            costs_data,
25            backward_size: backward_size as u32,
26        }
27    }
28
29    pub fn cost(&self, forward_id: u32, backward_id: u32) -> i32 {
30        let cost_id = (backward_id + forward_id * self.backward_size) as usize;
31        self.costs_data[cost_id] as i32
32    }
33}
34
35impl ArchivedConnectionCostMatrix {
36    pub fn cost(&self, forward_id: u32, backward_id: u32) -> i32 {
37        let cost_id = (backward_id + forward_id * self.backward_size) as usize;
38        self.costs_data[cost_id].to_native() as i32
39    }
40}