cordoba/
write.rs

1use std::collections::BTreeSet;
2use std::io::{Seek, SeekFrom, Write};
3use std::mem;
4
5use super::*;
6
7#[derive(Copy, Clone, Debug)]
8struct HashPos(Hash, u32);
9
10impl HashPos {
11    #[inline]
12    fn distance(self, tlen: usize, pos: usize) -> usize {
13        let startslot = self.0.slot(tlen);
14        pos.checked_sub(startslot)
15            .unwrap_or_else(|| pos + tlen - startslot)
16    }
17}
18
19const FILLFACTOR: usize = 2;
20
21pub struct Writer<T> {
22    file: T,
23    pos: u64,
24    tables: Vec<Vec<HashPos>>,
25    header: [PosLen; ENTRIES],
26}
27
28impl<T> Writer<T>
29where
30    T: Write + Seek,
31{
32    pub fn new(mut file: T) -> Result<Self, std::io::Error> {
33        let pos = (ENTRIES * PAIR_SIZE) as u64;
34        let tables = vec![Vec::new(); ENTRIES];
35        file.seek(SeekFrom::Start(pos))?;
36
37        Ok(Writer {
38            file,
39            pos,
40            tables,
41            header: [PosLen { pos: 0, len: 0 }; ENTRIES],
42        })
43    }
44
45    fn write_kv(&mut self, k: &[u8], v: &[u8]) -> Result<(), std::io::Error> {
46        self.file.write_all(&(k.len() as u32).to_le_bytes())?;
47        self.file.write_all(&(v.len() as u32).to_le_bytes())?;
48        self.file.write_all(k)?;
49        self.file.write_all(v)?;
50
51        self.pos += (PAIR_SIZE + k.len() + v.len()) as u64;
52
53        Ok(())
54    }
55
56    pub fn write(&mut self, k: &[u8], v: &[u8]) -> Result<(), std::io::Error> {
57        let hash = Hash::new(k);
58        let tableidx = hash.table();
59
60        self.tables[tableidx].push(HashPos(hash, self.pos as u32));
61
62        self.write_kv(k, v)?;
63
64        Ok(())
65    }
66
67    fn write_header(&mut self) -> Result<(), std::io::Error> {
68        self.file.seek(SeekFrom::Start(0))?;
69
70        for header in self.header.iter() {
71            self.file.write_all(&(header.pos as u32).to_le_bytes())?;
72            self.file.write_all(&(header.len as u32).to_le_bytes())?;
73        }
74
75        Ok(())
76    }
77
78    fn finish_generic<F>(mut self, fill: F) -> Result<T, std::io::Error>
79    where
80        F: Fn(&[HashPos], &mut Vec<HashPos>),
81    {
82        let mut tout = Vec::new();
83
84        for (i, table) in self.tables.iter().enumerate() {
85            fill(&table, &mut tout);
86            self.header[i] = PosLen {
87                pos: self.pos as usize,
88                len: tout.len(),
89            };
90            for row in &tout {
91                let hash: u32 = row.0.into();
92                self.file.write_all(&hash.to_le_bytes())?;
93                self.file.write_all(&row.1.to_le_bytes())?;
94            }
95            self.pos += (PAIR_SIZE * tout.len()) as u64;
96        }
97        self.write_header()?;
98        self.file.flush()?;
99
100        Ok(self.file)
101    }
102
103    pub fn finish(self) -> Result<T, std::io::Error> {
104        self.finish_robinhood()
105    }
106
107    pub fn finish_naive(self) -> Result<T, std::io::Error> {
108        self.finish_generic(fill_table_naive)
109    }
110
111    pub fn finish_btree(self) -> Result<T, std::io::Error> {
112        self.finish_generic(fill_table_btree)
113    }
114
115    pub fn finish_robinhood(self) -> Result<T, std::io::Error> {
116        self.finish_generic(fill_table_robinhood)
117    }
118
119    pub fn into_file(self) -> T {
120        self.file
121    }
122
123    pub fn file(&self) -> &T {
124        &self.file
125    }
126}
127
128fn fill_table_naive(input: &[HashPos], output: &mut Vec<HashPos>) {
129    let tlen = input.len() * FILLFACTOR;
130    output.clear();
131    output.resize(tlen, HashPos(Hash(0), 0));
132
133    for hp in input {
134        let (left, right) = output.split_at_mut(hp.0.slot(tlen));
135
136        for slot in right.iter_mut().chain(left.iter_mut()) {
137            if slot.1 == 0 {
138                *slot = *hp;
139                break;
140            }
141        }
142    }
143}
144
145fn fill_table_btree(input: &[HashPos], output: &mut Vec<HashPos>) {
146    let mut cache = BTreeSet::new();
147    let tlen = input.len() * FILLFACTOR;
148    output.clear();
149    output.resize(tlen, HashPos(Hash(0), 0));
150
151    cache.extend(0..tlen);
152
153    for hp in input {
154        let startpos = hp.0.slot(tlen);
155        let idx = *cache
156            .range(startpos..)
157            .chain(cache.range(0..startpos))
158            .next()
159            .unwrap();
160        cache.take(&idx);
161
162        debug_assert_eq!(output[idx].1, 0);
163        output[idx] = *hp;
164    }
165}
166
167fn fill_table_robinhood(input: &[HashPos], output: &mut Vec<HashPos>) {
168    let tlen = input.len() * FILLFACTOR;
169    output.clear();
170    output.resize(tlen, HashPos(Hash(0), 0));
171
172    for mut hp in input.iter().cloned() {
173        let startslot = hp.0.slot(tlen);
174        let (left, right) = output.split_at_mut(startslot);
175        let mut slotnum = startslot;
176        let mut distance = 0;
177
178        for slot in right.iter_mut().chain(left.iter_mut()) {
179            if slot.1 == 0 {
180                *slot = hp;
181                break;
182            } else if slot.distance(tlen, slotnum) < distance {
183                mem::swap(slot, &mut hp);
184                distance = hp.distance(tlen, slotnum);
185            }
186            distance += 1;
187            slotnum = (slotnum + 1) % tlen;
188        }
189    }
190}