cdb2/
reader.rs

1use std::cmp::min;
2use std::fs::File;
3use std::io;
4use std::path;
5
6use memmap2::Mmap;
7
8use crate::hash::hash;
9use crate::uint32;
10
11pub use std::io::Result;
12
13const KEYSIZE: usize = 32;
14
15/// CDB file reader
16///
17/// # Example
18///
19/// ```
20/// # fn main() -> std::io::Result<()> {
21/// use cdb2::CDB;
22///
23/// let cdb = CDB::open("tests/test1.cdb")?;
24/// for result in cdb.find(b"one") {
25///     println!("{:?}", result?);
26/// }
27/// # Ok(())
28/// # }
29/// ```
30pub struct CDB {
31    file: Mmap,
32    size: usize,
33}
34
35fn err_badfile<T>() -> Result<T> {
36    Err(io::Error::other("Invalid file format"))
37}
38
39impl CDB {
40    /// Opens the named file and returns the CDB reader.
41    ///
42    /// # Examples
43    ///
44    /// ```
45    /// # fn main() -> std::io::Result<()> {
46    /// use cdb2::CDB;
47    ///
48    /// let cdb = CDB::open("tests/test1.cdb")?;
49    /// # Ok(())
50    /// # }
51    /// ```
52    pub fn open<P: AsRef<path::Path>>(filename: P) -> Result<CDB> {
53        let file = File::open(filename)?;
54        let file = unsafe { Mmap::map(&file)? };
55        if file.len() < 2048 || file.len() > 0xffffffff {
56            return err_badfile();
57        }
58        let size = file.len();
59        Ok(CDB { file, size })
60    }
61
62    fn read(&self, buf: &mut [u8], pos: u32) -> Result<usize> {
63        let len = buf.len();
64        let pos = pos as usize;
65        if pos + len > self.size {
66            return err_badfile();
67        }
68        buf.copy_from_slice(&self.file[pos..pos + len]);
69        Ok(len)
70    }
71
72    fn hash_table(&self, khash: u32) -> (u32, u32, u32) {
73        let x = ((khash as usize) & 0xff) << 3;
74        let (hpos, hslots) = uint32::unpack2(&self.file[x..x + 8]);
75        let kpos = if hslots > 0 {
76            hpos.wrapping_add(((khash >> 8) % hslots) << 3)
77        } else {
78            0
79        };
80        (hpos, hslots, kpos)
81    }
82
83    fn match_key(&self, key: &[u8], pos: u32) -> Result<bool> {
84        let mut buf = [0_u8; KEYSIZE];
85        let mut len = key.len();
86        let mut pos = pos;
87        let mut keypos = 0;
88
89        while len > 0 {
90            let n = min(len, buf.len());
91            self.read(&mut buf[..n], pos)?;
92            if buf[..n] != key[keypos..keypos + n] {
93                return Ok(false);
94            }
95            pos += n as u32;
96            keypos += n;
97            len -= n;
98        }
99        Ok(true)
100    }
101
102    /// Find the first record with the named key.
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// # fn main() -> std::io::Result<()> {
108    /// use cdb2::CDB;
109    ///
110    /// let cdb = CDB::open("tests/test1.cdb")?;
111    /// if let Some(record) = cdb.get(b"one") {
112    ///     println!("{:?}", record?);
113    /// }
114    /// # Ok(())
115    /// # }
116    /// ```
117    pub fn get(&self, key: &[u8]) -> Option<Result<Vec<u8>>> {
118        self.find(key).next()
119    }
120
121    /// Find all records with the named key. The returned iterator
122    /// produces each value associated with the key.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// # fn main() -> std::io::Result<()> {
128    /// use cdb2::CDB;
129    ///
130    /// let cdb = CDB::open("tests/test1.cdb")?;
131    /// for result in cdb.find(b"one") {
132    ///     println!("{:?}", result?);
133    /// }
134    /// # Ok(())
135    /// # }
136    /// ```
137    pub fn find(&self, key: &[u8]) -> CDBValueIter<'_> {
138        CDBValueIter::find(self, key)
139    }
140
141    /// Iterate over all the `(key, value)` pairs in the database.
142    ///
143    /// # Examples
144    ///
145    /// ```
146    /// # fn main() -> std::io::Result<()> {
147    /// use cdb2::CDB;
148    ///
149    /// let cdb = CDB::open("tests/test1.cdb")?;
150    /// for result in cdb.iter() {
151    ///     let (key, value) = result?;
152    ///     println!("{:?} => {:?}", key, value);
153    /// }
154    /// # Ok(())
155    /// # }
156    /// ````
157    pub fn iter(&self) -> CDBKeyValueIter<'_> {
158        CDBKeyValueIter::start(self)
159    }
160}
161
162/// Type alias for [`CDBValueIter`]
163pub type CDBIter<'a> = CDBValueIter<'a>;
164
165/// Iterator over a set of records in the CDB with the same key.
166///
167/// See [`CDB::find`]
168pub struct CDBValueIter<'a> {
169    cdb: &'a CDB,
170    key: Vec<u8>,
171    khash: u32,
172    kloop: u32,
173    kpos: u32,
174    hpos: u32,
175    hslots: u32,
176    dpos: u32,
177    dlen: u32,
178}
179
180impl<'a> CDBValueIter<'a> {
181    fn find(cdb: &'a CDB, key: &[u8]) -> Self {
182        let khash = hash(key);
183        let (hpos, hslots, kpos) = cdb.hash_table(khash);
184
185        CDBValueIter {
186            cdb,
187            key: key.to_vec(),
188            khash,
189            kloop: 0,
190            kpos,
191            hpos,
192            hslots,
193            dpos: 0,
194            dlen: 0,
195        }
196    }
197
198    fn read_vec(&self) -> Result<Vec<u8>> {
199        let mut result = vec![0; self.dlen as usize];
200        self.cdb.read(&mut result[..], self.dpos)?;
201        Ok(result)
202    }
203}
204
205macro_rules! iter_try {
206    ( $e:expr ) => {
207        match $e {
208            Err(x) => {
209                return Some(Err(x));
210            }
211            Ok(y) => y,
212        }
213    };
214}
215
216macro_rules! iter_checked {
217    ( $e:expr ) => {
218        match $e {
219            None => {
220                return Some(err_badfile());
221            }
222            Some(y) => y,
223        }
224    };
225}
226
227impl<'a> Iterator for CDBValueIter<'a> {
228    type Item = Result<Vec<u8>>;
229    fn next(&mut self) -> Option<Self::Item> {
230        while self.kloop < self.hslots {
231            let mut buf = [0_u8; 8];
232            let kpos = self.kpos;
233            iter_try!(self.cdb.read(&mut buf, kpos));
234            let (khash, pos) = uint32::unpack2(&buf);
235            if pos == 0 {
236                return None;
237            }
238            self.kloop += 1;
239            self.kpos += 8;
240            if self.kpos == iter_checked!(self.hpos.checked_add(self.hslots << 3)) {
241                self.kpos = self.hpos;
242            }
243            if khash == self.khash {
244                iter_try!(self.cdb.read(&mut buf, pos));
245                let (klen, dlen) = uint32::unpack2(&buf);
246                if klen as usize == self.key.len()
247                    && iter_try!(self.cdb.match_key(&self.key[..], pos + 8))
248                {
249                    self.dlen = dlen;
250                    self.dpos = pos + 8 + self.key.len() as u32;
251                    return Some(self.read_vec());
252                }
253            }
254        }
255        None
256    }
257}
258
259/// Iterator over all the records in the CDB.
260///
261/// See [`CDB::iter`]
262pub struct CDBKeyValueIter<'a> {
263    cdb: &'a CDB,
264    pos: u32,
265    data_end: u32,
266}
267
268impl<'a> CDBKeyValueIter<'a> {
269    fn start(cdb: &'a CDB) -> Self {
270        let data_end = uint32::unpack(&cdb.file[0..4]).min(cdb.size as u32);
271        Self {
272            cdb,
273            pos: 2048,
274            data_end,
275        }
276    }
277}
278
279impl<'a> Iterator for CDBKeyValueIter<'a> {
280    type Item = Result<(Vec<u8>, Vec<u8>)>;
281    fn next(&mut self) -> Option<Self::Item> {
282        if self.pos + 8 >= self.data_end {
283            None
284        } else {
285            let (klen, dlen) =
286                uint32::unpack2(&self.cdb.file[self.pos as usize..self.pos as usize + 8]);
287            let total_len = self
288                .pos
289                .saturating_add(8)
290                .saturating_add(klen)
291                .saturating_add(dlen);
292            if total_len > self.data_end {
293                Some(err_badfile())
294            } else {
295                let kpos = (self.pos + 8) as usize;
296                let dpos = kpos + klen as usize;
297                let mut key = vec![0; klen as usize];
298                let mut value = vec![0; dlen as usize];
299                // Copied from CDB::read
300                key.copy_from_slice(&self.cdb.file[kpos..kpos + klen as usize]);
301                value.copy_from_slice(&self.cdb.file[dpos..dpos + dlen as usize]);
302                self.pos += 8 + klen + dlen;
303                Some(Ok((key, value)))
304            }
305        }
306    }
307}