1use std::borrow::Cow;
2#[cfg(feature = "ipadic")]
3use std::env;
4
5use lindera_core::{
6 character_definition::CharacterDefinitions, connection::ConnectionCostMatrix,
7 dictionary::Dictionary, prefix_dict::PrefixDict, unknown_dictionary::UnknownDictionary,
8 LinderaResult,
9};
10#[cfg(feature = "compress")]
11use lindera_decompress::decompress;
12
13macro_rules! decompress_data {
14 ($name: ident, $bytes: expr, $filename: literal) => {
15 #[cfg(feature = "compress")]
16 const $name: once_cell::sync::Lazy<Vec<u8>> = once_cell::sync::Lazy::new(|| {
17 let compressed_data = bincode::deserialize_from(&$bytes[..])
18 .expect(concat!("invalid file format ", $filename));
19 decompress(compressed_data).expect(concat!("invalid file format ", $filename))
20 });
21 #[cfg(not(feature = "compress"))]
22 const $name: &'static [u8] = $bytes;
23 };
24}
25
26#[cfg(feature = "ipadic")]
27decompress_data!(
28 CHAR_DEFINITION_DATA,
29 include_bytes!(concat!(
30 env!("LINDERA_WORKDIR"),
31 "/lindera-ipadic/char_def.bin"
32 )),
33 "char_def.bin"
34);
35#[cfg(not(feature = "ipadic"))]
36decompress_data!(CHAR_DEFINITION_DATA, &[], "char_def.bin");
37
38#[cfg(feature = "ipadic")]
39decompress_data!(
40 CONNECTION_DATA,
41 include_bytes!(concat!(
42 env!("LINDERA_WORKDIR"),
43 "/lindera-ipadic/matrix.mtx"
44 )),
45 "matrix.mtx"
46);
47#[cfg(not(feature = "ipadic"))]
48decompress_data!(CONNECTION_DATA, &[], "matrix.mtx");
49
50#[cfg(feature = "ipadic")]
51decompress_data!(
52 IPADIC_DATA,
53 include_bytes!(concat!(env!("LINDERA_WORKDIR"), "/lindera-ipadic/dict.da")),
54 "dict.da"
55);
56#[cfg(not(feature = "ipadic"))]
57decompress_data!(IPADIC_DATA, &[], "dict.da");
58
59#[cfg(feature = "ipadic")]
60decompress_data!(
61 IPADIC_VALS,
62 include_bytes!(concat!(
63 env!("LINDERA_WORKDIR"),
64 "/lindera-ipadic/dict.vals"
65 )),
66 "dict.vals"
67);
68#[cfg(not(feature = "ipadic"))]
69decompress_data!(IPADIC_VALS, &[], "dict.vals");
70
71#[cfg(feature = "ipadic")]
72decompress_data!(
73 UNKNOWN_DATA,
74 include_bytes!(concat!(env!("LINDERA_WORKDIR"), "/lindera-ipadic/unk.bin")),
75 "unk.bin"
76);
77#[cfg(not(feature = "ipadic"))]
78decompress_data!(UNKNOWN_DATA, &[], "unk.bin");
79
80#[cfg(feature = "ipadic")]
81decompress_data!(
82 WORDS_IDX_DATA,
83 include_bytes!(concat!(
84 env!("LINDERA_WORKDIR"),
85 "/lindera-ipadic/dict.wordsidx"
86 )),
87 "dict.wordsidx"
88);
89#[cfg(not(feature = "ipadic"))]
90decompress_data!(WORDS_IDX_DATA, &[], "dict.wordsidx");
91
92#[cfg(feature = "ipadic")]
93decompress_data!(
94 WORDS_DATA,
95 include_bytes!(concat!(
96 env!("LINDERA_WORKDIR"),
97 "/lindera-ipadic/dict.words"
98 )),
99 "dict.words"
100);
101#[cfg(not(feature = "ipadic"))]
102decompress_data!(WORDS_DATA, &[], "dict.words");
103
104pub fn load_dictionary() -> LinderaResult<Dictionary> {
105 Ok(Dictionary {
106 dict: prefix_dict(),
107 cost_matrix: connection(),
108 char_definitions: char_def()?,
109 unknown_dictionary: unknown_dict()?,
110 words_idx_data: words_idx_data(),
111 words_data: words_data(),
112 })
113}
114
115pub fn char_def() -> LinderaResult<CharacterDefinitions> {
116 #[allow(clippy::needless_borrow)]
117 CharacterDefinitions::load(&CHAR_DEFINITION_DATA)
118}
119
120pub fn connection() -> ConnectionCostMatrix {
121 #[cfg(feature = "compress")]
122 {
123 ConnectionCostMatrix::load(&CONNECTION_DATA)
124 }
125 #[cfg(not(feature = "compress"))]
126 {
127 ConnectionCostMatrix::load_static(CONNECTION_DATA)
128 }
129}
130
131pub fn prefix_dict() -> PrefixDict {
132 #[allow(clippy::needless_borrow)]
133 PrefixDict::from_static_slice(&IPADIC_DATA, &IPADIC_VALS)
134}
135
136pub fn unknown_dict() -> LinderaResult<UnknownDictionary> {
137 #[allow(clippy::needless_borrow)]
138 UnknownDictionary::load(&UNKNOWN_DATA)
139}
140
141pub fn words_idx_data() -> Cow<'static, [u8]> {
142 #[cfg(feature = "compress")]
143 {
144 Cow::Owned(WORDS_IDX_DATA.to_vec())
145 }
146 #[cfg(not(feature = "compress"))]
147 {
148 Cow::Borrowed(WORDS_IDX_DATA)
149 }
150}
151
152pub fn words_data() -> Cow<'static, [u8]> {
153 #[cfg(feature = "compress")]
154 {
155 Cow::Owned(WORDS_DATA.to_vec())
156 }
157 #[cfg(not(feature = "compress"))]
158 {
159 Cow::Borrowed(WORDS_DATA)
160 }
161}