lindera_ipadic/
lib.rs

1use std::borrow::Cow;
2#[cfg(feature = "ipadic")]
3use std::env;
4
5use lindera_core::{
6    character_definition::CharacterDefinitions, connection::ConnectionCostMatrix,
7    dictionary::Dictionary, prefix_dict::PrefixDict, unknown_dictionary::UnknownDictionary,
8    LinderaResult,
9};
10#[cfg(feature = "compress")]
11use lindera_decompress::decompress;
12
13macro_rules! decompress_data {
14    ($name: ident, $bytes: expr, $filename: literal) => {
15        #[cfg(feature = "compress")]
16        const $name: once_cell::sync::Lazy<Vec<u8>> = once_cell::sync::Lazy::new(|| {
17            let compressed_data = bincode::deserialize_from(&$bytes[..])
18                .expect(concat!("invalid file format ", $filename));
19            decompress(compressed_data).expect(concat!("invalid file format ", $filename))
20        });
21        #[cfg(not(feature = "compress"))]
22        const $name: &'static [u8] = $bytes;
23    };
24}
25
26#[cfg(feature = "ipadic")]
27decompress_data!(
28    CHAR_DEFINITION_DATA,
29    include_bytes!(concat!(
30        env!("LINDERA_WORKDIR"),
31        "/lindera-ipadic/char_def.bin"
32    )),
33    "char_def.bin"
34);
35#[cfg(not(feature = "ipadic"))]
36decompress_data!(CHAR_DEFINITION_DATA, &[], "char_def.bin");
37
38#[cfg(feature = "ipadic")]
39decompress_data!(
40    CONNECTION_DATA,
41    include_bytes!(concat!(
42        env!("LINDERA_WORKDIR"),
43        "/lindera-ipadic/matrix.mtx"
44    )),
45    "matrix.mtx"
46);
47#[cfg(not(feature = "ipadic"))]
48decompress_data!(CONNECTION_DATA, &[], "matrix.mtx");
49
50#[cfg(feature = "ipadic")]
51decompress_data!(
52    IPADIC_DATA,
53    include_bytes!(concat!(env!("LINDERA_WORKDIR"), "/lindera-ipadic/dict.da")),
54    "dict.da"
55);
56#[cfg(not(feature = "ipadic"))]
57decompress_data!(IPADIC_DATA, &[], "dict.da");
58
59#[cfg(feature = "ipadic")]
60decompress_data!(
61    IPADIC_VALS,
62    include_bytes!(concat!(
63        env!("LINDERA_WORKDIR"),
64        "/lindera-ipadic/dict.vals"
65    )),
66    "dict.vals"
67);
68#[cfg(not(feature = "ipadic"))]
69decompress_data!(IPADIC_VALS, &[], "dict.vals");
70
71#[cfg(feature = "ipadic")]
72decompress_data!(
73    UNKNOWN_DATA,
74    include_bytes!(concat!(env!("LINDERA_WORKDIR"), "/lindera-ipadic/unk.bin")),
75    "unk.bin"
76);
77#[cfg(not(feature = "ipadic"))]
78decompress_data!(UNKNOWN_DATA, &[], "unk.bin");
79
80#[cfg(feature = "ipadic")]
81decompress_data!(
82    WORDS_IDX_DATA,
83    include_bytes!(concat!(
84        env!("LINDERA_WORKDIR"),
85        "/lindera-ipadic/dict.wordsidx"
86    )),
87    "dict.wordsidx"
88);
89#[cfg(not(feature = "ipadic"))]
90decompress_data!(WORDS_IDX_DATA, &[], "dict.wordsidx");
91
92#[cfg(feature = "ipadic")]
93decompress_data!(
94    WORDS_DATA,
95    include_bytes!(concat!(
96        env!("LINDERA_WORKDIR"),
97        "/lindera-ipadic/dict.words"
98    )),
99    "dict.words"
100);
101#[cfg(not(feature = "ipadic"))]
102decompress_data!(WORDS_DATA, &[], "dict.words");
103
104pub fn load_dictionary() -> LinderaResult<Dictionary> {
105    Ok(Dictionary {
106        dict: prefix_dict(),
107        cost_matrix: connection(),
108        char_definitions: char_def()?,
109        unknown_dictionary: unknown_dict()?,
110        words_idx_data: words_idx_data(),
111        words_data: words_data(),
112    })
113}
114
115pub fn char_def() -> LinderaResult<CharacterDefinitions> {
116    #[allow(clippy::needless_borrow)]
117    CharacterDefinitions::load(&CHAR_DEFINITION_DATA)
118}
119
120pub fn connection() -> ConnectionCostMatrix {
121    #[cfg(feature = "compress")]
122    {
123        ConnectionCostMatrix::load(&CONNECTION_DATA)
124    }
125    #[cfg(not(feature = "compress"))]
126    {
127        ConnectionCostMatrix::load_static(CONNECTION_DATA)
128    }
129}
130
131pub fn prefix_dict() -> PrefixDict {
132    #[allow(clippy::needless_borrow)]
133    PrefixDict::from_static_slice(&IPADIC_DATA, &IPADIC_VALS)
134}
135
136pub fn unknown_dict() -> LinderaResult<UnknownDictionary> {
137    #[allow(clippy::needless_borrow)]
138    UnknownDictionary::load(&UNKNOWN_DATA)
139}
140
141pub fn words_idx_data() -> Cow<'static, [u8]> {
142    #[cfg(feature = "compress")]
143    {
144        Cow::Owned(WORDS_IDX_DATA.to_vec())
145    }
146    #[cfg(not(feature = "compress"))]
147    {
148        Cow::Borrowed(WORDS_IDX_DATA)
149    }
150}
151
152pub fn words_data() -> Cow<'static, [u8]> {
153    #[cfg(feature = "compress")]
154    {
155        Cow::Owned(WORDS_DATA.to_vec())
156    }
157    #[cfg(not(feature = "compress"))]
158    {
159        Cow::Borrowed(WORDS_DATA)
160    }
161}