1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
use std::env;
use lindera_core::character_definition::CharacterDefinitions;
use lindera_core::connection::ConnectionCostMatrix;
use lindera_core::prefix_dict::PrefixDict;
use lindera_core::unknown_dictionary::UnknownDictionary;
use lindera_core::LinderaResult;
use lindera_decompress::decompress;
macro_rules! decompress_data {
($name: ident, $bytes: expr, $filename: literal) => {
const $name: once_cell::sync::Lazy<Vec<u8>> = once_cell::sync::Lazy::new(|| {
let compressed_data = bincode::deserialize_from(&$bytes[..])
.expect(concat!("invalid file format ", $filename));
decompress(compressed_data).expect(concat!("invalid file format ", $filename))
});
};
}
decompress_data!(
CHAR_DEFINITION_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/char_def.bin")),
"char_def.bin"
);
decompress_data!(
CONNECTION_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/matrix.mtx")),
"matrix.mtx"
);
decompress_data!(
IPADIC_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/dict.da")),
"dict.da"
);
decompress_data!(
IPADIC_VALS,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/dict.vals")),
"dict.vals"
);
decompress_data!(
UNKNOWN_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/unk.bin")),
"unk.bin"
);
decompress_data!(
WORDS_IDX_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/dict.wordsidx")),
"dict.wordsidx"
);
decompress_data!(
WORDS_DATA,
include_bytes!(concat!(env!("OUT_DIR"), "/lindera-ipadic/dict.words")),
"dict.words"
);
pub fn char_def() -> LinderaResult<CharacterDefinitions> {
CharacterDefinitions::load(&CHAR_DEFINITION_DATA)
}
pub fn connection() -> ConnectionCostMatrix {
ConnectionCostMatrix::load(&CONNECTION_DATA)
}
pub fn prefix_dict() -> PrefixDict {
PrefixDict::from_static_slice(&IPADIC_DATA, &IPADIC_VALS)
}
pub fn unknown_dict() -> LinderaResult<UnknownDictionary> {
UnknownDictionary::load(&UNKNOWN_DATA)
}
pub fn words_idx_data() -> Vec<u8> {
WORDS_IDX_DATA.to_vec()
}
pub fn words_data() -> Vec<u8> {
WORDS_DATA.to_vec()
}