1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
use std::io::{Read, Write};
use crate::de::{BinDeserializer, BinDeserializerBase};
use crate::serde::UsizeLen;
use crate::Result;
use crate::{BinDeserialize, BinSerializer, BinSerializerBase, Mode};
use crate::util::serialize_iter;
const DEDUP_MODE: Mode = Mode {
usize_len: UsizeLen::Variable,
dedup_idx: UsizeLen::Variable,
fixed_size_use_varint: false,
use_dedup: false,
};
pub struct DedupContext {
strings: Vec<(String, usize)>,
by_index: Vec<usize>,
}
impl DedupContext {
pub fn new() -> Self {
DedupContext {
strings: Vec::new(),
by_index: Vec::new(),
}
}
pub fn put_str(&mut self, s: &str) -> usize {
match self.strings.binary_search_by(|el| (*el.0).cmp(s)) {
Ok(idx) => self.strings[idx].1,
Err(idx) => {
let l = self.strings.len();
self.strings.insert(idx, (s.to_string(), l));
l
}
}
}
pub fn get_str(&self, idx: usize) -> Option<&str> {
self.by_index.get(idx).map(|el| &*self.strings[*el].0)
}
pub fn write_to<W: Write>(&self, pipe: W) -> Result<()> {
let ser = BinSerializerBase::new(pipe).with_mode(DEDUP_MODE);
let mut by_index: Vec<_> = self.strings.iter().collect();
by_index.sort_unstable_by_key(|el| el.1);
serialize_iter(by_index.into_iter().map(|el| &el.0), ser)?;
Ok(())
}
pub fn read_from<R: Read>(pipe: R) -> Result<Self> {
let empty = DedupContext::new();
let de = BinDeserializerBase::new(pipe, &empty).with_mode(DEDUP_MODE);
let by_index: Vec<String> = Vec::deserialize(de)?;
let mut strings: Vec<_> = by_index
.into_iter()
.enumerate()
.map(|(idx, s)| (s, idx))
.collect();
strings.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let mut by_index = vec![0; strings.len()];
for (idx, el) in strings.iter().enumerate() {
by_index[el.1] = idx;
}
Ok(DedupContext { strings, by_index })
}
}