1use std::collections::BTreeSet;
2use std::io::{Seek, SeekFrom, Write};
3use std::mem;
4
5use super::*;
6
7#[derive(Copy, Clone, Debug)]
8struct HashPos(Hash, u32);
9
10impl HashPos {
11 #[inline]
12 fn distance(self, tlen: usize, pos: usize) -> usize {
13 let startslot = self.0.slot(tlen);
14 pos.checked_sub(startslot)
15 .unwrap_or_else(|| pos + tlen - startslot)
16 }
17}
18
19const FILLFACTOR: usize = 2;
20
21pub struct Writer<T> {
22 file: T,
23 pos: u64,
24 tables: Vec<Vec<HashPos>>,
25 header: [PosLen; ENTRIES],
26}
27
28impl<T> Writer<T>
29where
30 T: Write + Seek,
31{
32 pub fn new(mut file: T) -> Result<Self, std::io::Error> {
33 let pos = (ENTRIES * PAIR_SIZE) as u64;
34 let tables = vec![Vec::new(); ENTRIES];
35 file.seek(SeekFrom::Start(pos))?;
36
37 Ok(Writer {
38 file,
39 pos,
40 tables,
41 header: [PosLen { pos: 0, len: 0 }; ENTRIES],
42 })
43 }
44
45 fn write_kv(&mut self, k: &[u8], v: &[u8]) -> Result<(), std::io::Error> {
46 self.file.write_all(&(k.len() as u32).to_le_bytes())?;
47 self.file.write_all(&(v.len() as u32).to_le_bytes())?;
48 self.file.write_all(k)?;
49 self.file.write_all(v)?;
50
51 self.pos += (PAIR_SIZE + k.len() + v.len()) as u64;
52
53 Ok(())
54 }
55
56 pub fn write(&mut self, k: &[u8], v: &[u8]) -> Result<(), std::io::Error> {
57 let hash = Hash::new(k);
58 let tableidx = hash.table();
59
60 self.tables[tableidx].push(HashPos(hash, self.pos as u32));
61
62 self.write_kv(k, v)?;
63
64 Ok(())
65 }
66
67 fn write_header(&mut self) -> Result<(), std::io::Error> {
68 self.file.seek(SeekFrom::Start(0))?;
69
70 for header in self.header.iter() {
71 self.file.write_all(&(header.pos as u32).to_le_bytes())?;
72 self.file.write_all(&(header.len as u32).to_le_bytes())?;
73 }
74
75 Ok(())
76 }
77
78 fn finish_generic<F>(mut self, fill: F) -> Result<T, std::io::Error>
79 where
80 F: Fn(&[HashPos], &mut Vec<HashPos>),
81 {
82 let mut tout = Vec::new();
83
84 for (i, table) in self.tables.iter().enumerate() {
85 fill(&table, &mut tout);
86 self.header[i] = PosLen {
87 pos: self.pos as usize,
88 len: tout.len(),
89 };
90 for row in &tout {
91 let hash: u32 = row.0.into();
92 self.file.write_all(&hash.to_le_bytes())?;
93 self.file.write_all(&row.1.to_le_bytes())?;
94 }
95 self.pos += (PAIR_SIZE * tout.len()) as u64;
96 }
97 self.write_header()?;
98 self.file.flush()?;
99
100 Ok(self.file)
101 }
102
103 pub fn finish(self) -> Result<T, std::io::Error> {
104 self.finish_robinhood()
105 }
106
107 pub fn finish_naive(self) -> Result<T, std::io::Error> {
108 self.finish_generic(fill_table_naive)
109 }
110
111 pub fn finish_btree(self) -> Result<T, std::io::Error> {
112 self.finish_generic(fill_table_btree)
113 }
114
115 pub fn finish_robinhood(self) -> Result<T, std::io::Error> {
116 self.finish_generic(fill_table_robinhood)
117 }
118
119 pub fn into_file(self) -> T {
120 self.file
121 }
122
123 pub fn file(&self) -> &T {
124 &self.file
125 }
126}
127
128fn fill_table_naive(input: &[HashPos], output: &mut Vec<HashPos>) {
129 let tlen = input.len() * FILLFACTOR;
130 output.clear();
131 output.resize(tlen, HashPos(Hash(0), 0));
132
133 for hp in input {
134 let (left, right) = output.split_at_mut(hp.0.slot(tlen));
135
136 for slot in right.iter_mut().chain(left.iter_mut()) {
137 if slot.1 == 0 {
138 *slot = *hp;
139 break;
140 }
141 }
142 }
143}
144
145fn fill_table_btree(input: &[HashPos], output: &mut Vec<HashPos>) {
146 let mut cache = BTreeSet::new();
147 let tlen = input.len() * FILLFACTOR;
148 output.clear();
149 output.resize(tlen, HashPos(Hash(0), 0));
150
151 cache.extend(0..tlen);
152
153 for hp in input {
154 let startpos = hp.0.slot(tlen);
155 let idx = *cache
156 .range(startpos..)
157 .chain(cache.range(0..startpos))
158 .next()
159 .unwrap();
160 cache.take(&idx);
161
162 debug_assert_eq!(output[idx].1, 0);
163 output[idx] = *hp;
164 }
165}
166
167fn fill_table_robinhood(input: &[HashPos], output: &mut Vec<HashPos>) {
168 let tlen = input.len() * FILLFACTOR;
169 output.clear();
170 output.resize(tlen, HashPos(Hash(0), 0));
171
172 for mut hp in input.iter().cloned() {
173 let startslot = hp.0.slot(tlen);
174 let (left, right) = output.split_at_mut(startslot);
175 let mut slotnum = startslot;
176 let mut distance = 0;
177
178 for slot in right.iter_mut().chain(left.iter_mut()) {
179 if slot.1 == 0 {
180 *slot = hp;
181 break;
182 } else if slot.distance(tlen, slotnum) < distance {
183 mem::swap(slot, &mut hp);
184 distance = hp.distance(tlen, slotnum);
185 }
186 distance += 1;
187 slotnum = (slotnum + 1) % tlen;
188 }
189 }
190}