1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
pub mod delta;
pub mod packed_bool;
pub mod varint;
#[cfg(feature = "write")]
use crate::internal::encodings::varint::size_for_varint;
use crate::prelude::*;
#[cfg(feature = "write")]
pub(crate) fn compress<'a: 'b, 'b, T>(data: &'a [T], bytes: &mut Vec<u8>, compressors: &'b [Box<dyn Compressor<'a, Data = T>>]) -> ArrayTypeId {
if compressors.len() == 1 {
return compressors[0].compress(data, bytes).unwrap();
}
let restore_point = bytes.len();
let sample_size = data.len().min(256);
let sample = &data[..sample_size];
let mut by_size = Vec::new();
for i in 0..compressors.len() {
let compressor = &compressors[i];
if let Some(size) = compressor.fast_size_for(sample) {
by_size.push((i, size));
} else {
if compressor.compress(sample, bytes).is_ok() {
let size = bytes.len() - restore_point;
by_size.push((i, size));
}
bytes.truncate(restore_point);
}
}
by_size.sort_by_key(|&(_, size)| size);
for ranked in by_size.iter() {
let compressor = &compressors[ranked.0];
if let Ok(ok) = compressor.compress(data, bytes) {
return ok;
}
bytes.truncate(restore_point);
}
panic!("Missing infallable compressor for type");
}
#[cfg(feature = "write")]
pub(crate) trait Compressor<'a> {
type Data;
fn fast_size_for(&self, _data: &[Self::Data]) -> Option<usize> {
None
}
fn compress(&self, data: &[Self::Data], bytes: &mut Vec<u8>) -> Result<ArrayTypeId, ()>;
}
#[cfg(feature = "write")]
pub(crate) struct Utf8Compressor;
#[cfg(feature = "write")]
impl<'a> Compressor<'a> for Utf8Compressor {
type Data = &'a str;
fn fast_size_for(&self, data: &[Self::Data]) -> Option<usize> {
let mut total = 0;
for s in data {
total += size_for_varint(s.len() as u64);
total += s.as_bytes().len();
}
Some(total)
}
fn compress(&self, _data: &[Self::Data], _bytes: &mut Vec<u8>) -> Result<ArrayTypeId, ()> {
todo!("utf8 compress");
}
}
#[cfg(feature = "read")]
pub fn read_all<T>(bytes: &[u8], f: impl Fn(&[u8], &mut usize) -> ReadResult<T>) -> ReadResult<Vec<T>> {
let mut offset = 0;
let mut result = Vec::new();
while offset < bytes.len() {
let read = f(bytes, &mut offset)?;
result.push(read);
}
debug_assert_eq!(offset, bytes.len());
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fmt::Debug;
#[cfg(all(feature = "read", feature = "write"))]
pub fn round_trip<T: Copy + PartialEq + Debug>(data: &[T], encoder: impl Fn(T, &mut Vec<u8>), decoder: impl Fn(&[u8], &mut usize) -> ReadResult<T>) -> ReadResult<()> {
let mut bytes = Vec::new();
for value in data.iter() {
encoder(*value, &mut bytes);
}
let result = read_all(&bytes, decoder)?;
assert_eq!(&result, &data);
Ok(())
}
}