1use crate::core::{bulk_load_u32, CODE_ESCAPE, U64_SIZE};
2use crate::core::symbol::Symbol;
3use crate::core::symbol_table::SymbolTable;
4use crate::util::endian::Endian;
5
6pub struct Encoder<'a> {
7 symbol_table: &'a Box<dyn SymbolTable>,
8}
9
10impl Encoder<'_> {
11 pub fn from_table(table: &Box<dyn SymbolTable>) -> Encoder {
12 Encoder { symbol_table: table }
13 }
14
15 pub fn encode_str(&self, str: &str) -> Vec<u8> {
16 let mut buf = vec![0; str.len() << 1];
17 let (mut pos_in, mut pos_out) = (0, 0);
18 while pos_in < str.len() {
19 let target = Symbol::from_str(&str[pos_in..]);
20 buf[pos_out + 1] = target.first() as u8;
21 let (code, s_len, out_len) = self.symbol_table.encode_for(&target);
22 buf[pos_out] = code;
23 pos_out += out_len;
24 pos_in += s_len;
25 }
26 buf.truncate(pos_out);
27 buf
28 }
29
30 pub fn encode(&self, str: &str, include_table: bool) -> Vec<u8> {
31 let mut buf = self.encode_str(str);
32 if include_table {
33 let mut table_buf = self.symbol_table.dump();
34 let mut buf_with_tab = Vec::with_capacity(table_buf.len() + table_buf.len());
35 buf_with_tab.append(&mut table_buf);
36 buf_with_tab.append(&mut buf);
37 buf_with_tab
38 } else {
39 buf
40 }
41 }
42}
43
44pub struct Decoder {
45 symbols: [u64; CODE_ESCAPE as usize],
46 lens: [u8; CODE_ESCAPE as usize],
47}
48
49impl Decoder {
50 pub fn from_table(table: &Box<dyn SymbolTable>) -> Decoder {
51 let mut symbols = [0u64; CODE_ESCAPE as usize];
52 let mut lens = [0u8; CODE_ESCAPE as usize];
53 for i in 0..table.len() {
54 let s = table.get_symbol(i as u16);
55 symbols[i] = s.as_u64();
56 lens[i] = s.length() as u8;
57 }
58 Decoder { symbols, lens }
59 }
60
61 pub fn from_table_bytes(buf: &Vec<u8>) -> (usize, Decoder) {
62 let mut symbols = [0u64; CODE_ESCAPE as usize];
63 let mut lens = [0u8; CODE_ESCAPE as usize];
64 let encode_endian = Endian::from_u8(*buf.get(0).unwrap());
65 let len_histo = &buf[1..9];
66 let (mut pos, mut code) = (9, 0usize);
67 for len in 1..=Symbol::MAX_LEN {
68 for _ in 0..len_histo[len - 1] {
69 let mut num = 0u64;
70 if Endian::get_native_endian() != encode_endian {
71 num |= *buf.get(pos).unwrap() as u64;
72 for i in 1..len {
73 num <<= 8;
74 num |= *buf.get(pos + i).unwrap() as u64;
75 }
76 } else {
77 num |= *buf.get(pos + len - 1).unwrap() as u64;
78 for i in (0..len - 1).rev() {
79 num <<= 8;
80 num |= *buf.get(pos + i).unwrap() as u64;
81 }
82 }
83 symbols[code] = num;
84 lens[code] = len as u8;
85 code += 1;
86 pos += len;
87 }
88 }
89 (pos, Decoder { symbols, lens })
90 }
91
92 pub fn decode_with_tab(table: &Box<dyn SymbolTable>, buf: &Vec<u8>) -> String {
94 let mut str = String::with_capacity(buf.len() * 4);
95 let mut pos = 0;
96 while pos < buf.len() {
97 let b = buf.get(pos).unwrap();
98 pos += 1;
99 if *b == 255 {
100 str.push(*buf.get(pos).unwrap() as char);
101 pos += 1;
102 } else {
103 str.push_str(&table.get_symbol(*b as u16).to_string());
104 }
105 }
106 str
107 }
108
109 pub fn decode(&self, str_buf: &Vec<u8>) -> String {
111 let (mut pos_in, mut pos_out) = (0, 0);
112 let mut decode_buf = vec![0u8; str_buf.len() * Symbol::MAX_LEN];
113 unsafe {
114 let out = decode_buf.as_mut_ptr();
115 while pos_in + 4 < str_buf.len() {
116 let next_block = bulk_load_u32(&str_buf[pos_in..pos_in + 4]);
117 let escape_mask = (next_block & 0x80808080) & ((((!next_block) & 0x7F7F7F7F) + 0x7F7F7F7F) ^ 0x80808080);
118 if escape_mask == 0 {
119 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
120 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
121 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
122 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
123 } else {
124 let mut first_escape_pos = escape_mask.trailing_zeros() >> 3;
125 while first_escape_pos > 0 {
126 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
127 first_escape_pos -= 1;
128 }
129 decode_buf[pos_out] = str_buf[pos_in + 1];
130 pos_in += 2;
131 pos_out += 1;
132 }
133 }
134 while pos_in < str_buf.len() {
135 if str_buf[pos_in] != CODE_ESCAPE {
136 self.unaligned_store(&mut pos_in, &mut pos_out, &str_buf, out);
137 } else {
138 decode_buf[pos_out] = str_buf[pos_in + 1];
139 pos_in += 2;
140 pos_out += 1;
141 }
142 }
143 decode_buf.truncate(pos_out);
144 String::from_utf8_unchecked(decode_buf)
145 }
146 }
147
148 #[inline(always)]
149 unsafe fn unaligned_store(&self, pos_in: &mut usize, pos_out: &mut usize, str_in: &Vec<u8>, out: *mut u8) {
150 let code = str_in[*pos_in] as usize;
151 std::ptr::copy_nonoverlapping(self.symbols[code].to_ne_bytes().as_ptr(), out.add(*pos_out), U64_SIZE);
152 *pos_in += 1;
153 *pos_out += self.lens[code] as usize;
154 }
155}
156
157#[cfg(test)]
158mod test {
159 use crate::core::codec::{Decoder, Encoder};
160 use crate::core::symbol_table::SymbolTableBuilder;
161
162 #[test]
163 pub fn test_decode_with_dump_table() {
164 let test_str = "paqvawflxucgajxfzxwooypirnzkahobfvxzhrerdwzkerwwolqfbafwslwhsvuitbtgkvnjrdr";
165 let symbol_table = SymbolTableBuilder::build_from(test_str);
166 let encoder = Encoder::from_table(&symbol_table);
167 let buf = symbol_table.dump();
168 let (table_end_pos, decoder) = Decoder::from_table_bytes(&buf);
169 assert_eq!(buf.len(), table_end_pos);
170 let encode_buf = encoder.encode(test_str, false);
171 let decode_str = decoder.decode(&encode_buf);
172 assert_eq!(test_str, decode_str);
173 }
174}