jpreprocess_dictionary/dictionary/
word_encoding.rs1use byteorder::{ByteOrder, LittleEndian};
2use jpreprocess_core::word_line::WordDetailsLine;
3use lindera_dictionary::{error::LinderaErrorKind, LinderaResult};
4
5pub trait DictionaryWordEncoding: Sized {
7 fn identifier() -> &'static str;
8 fn encode(row: WordDetailsLine) -> LinderaResult<Vec<u8>>;
9 fn decode(string: String, details: &[u8]) -> LinderaResult<Vec<String>>;
10}
11
12pub struct JPreprocessDictionaryWordEncoding;
13impl JPreprocessDictionaryWordEncoding {
14 pub fn serialize(
15 data: &jpreprocess_core::word_entry::WordEntry,
16 ) -> Result<Vec<u8>, bincode::error::EncodeError> {
17 bincode::serde::encode_to_vec(data, Self::bincode_option())
18 }
19 pub fn deserialize(
20 data: &[u8],
21 ) -> Result<jpreprocess_core::word_entry::WordEntry, bincode::error::DecodeError> {
22 let (decoded, _size) = bincode::serde::decode_from_slice(data, Self::bincode_option())?;
23 Ok(decoded)
24 }
25
26 fn bincode_option() -> bincode::config::Configuration {
27 bincode::config::standard()
28 .with_no_limit()
29 .with_little_endian()
30 }
31}
32impl DictionaryWordEncoding for JPreprocessDictionaryWordEncoding {
33 fn identifier() -> &'static str {
34 concat!("jpreprocess ", env!("CARGO_PKG_VERSION"))
35 }
36
37 fn encode(row: WordDetailsLine) -> LinderaResult<Vec<u8>> {
38 let data = row
39 .try_into()
40 .map_err(|err| LinderaErrorKind::Serialize.with_error(err))?;
41 Self::serialize(&data).map_err(|err| LinderaErrorKind::Serialize.with_error(err))
42 }
43
44 fn decode(string: String, data: &[u8]) -> LinderaResult<Vec<String>> {
45 let word_details: jpreprocess_core::word_entry::WordEntry =
46 Self::deserialize(data).map_err(|err| LinderaErrorKind::Deserialize.with_error(err))?;
47 Ok(word_details.to_str_vec(string).to_vec())
48 }
49}
50
51pub struct LinderaSystemDictionaryWordEncoding;
52impl DictionaryWordEncoding for LinderaSystemDictionaryWordEncoding {
53 fn identifier() -> &'static str {
54 unimplemented!("JPreprocess does not support building in Lindera dictionary format")
55 }
56
57 fn encode(_row: WordDetailsLine) -> LinderaResult<Vec<u8>> {
58 unimplemented!("JPreprocess does not support building in Lindera dictionary format")
59 }
60
61 fn decode(_string: String, data: &[u8]) -> LinderaResult<Vec<String>> {
62 let len = LittleEndian::read_u32(data) as usize;
63 let data = &data[4..4 + len];
64
65 let mut details = Vec::new();
66 for bytes in data.split(|&b| b == 0) {
67 let detail = match std::str::from_utf8(bytes) {
68 Ok(s) => s,
69 Err(err) => return Err(LinderaErrorKind::Deserialize.with_error(err)),
70 };
71 details.push(detail.to_string());
72 }
73 Ok(details)
74 }
75}
76
77pub struct LinderaUserDictionaryWordEncoding;
78impl DictionaryWordEncoding for LinderaUserDictionaryWordEncoding {
79 fn identifier() -> &'static str {
80 unimplemented!("JPreprocess does not support building in Lindera dictionary format")
81 }
82
83 fn encode(_row: WordDetailsLine) -> LinderaResult<Vec<u8>> {
84 unimplemented!("JPreprocess does not support building in Lindera dictionary format")
85 }
86
87 fn decode(_string: String, data: &[u8]) -> LinderaResult<Vec<String>> {
88 LinderaSystemDictionaryWordEncoding::decode(_string, data)
89 }
90}