1use std::{
2 fs::File,
3 hash::Hasher,
4 io::{self, ErrorKind},
5 marker::PhantomData,
6 path::Path,
7};
8
9#[cfg(feature = "mmap")]
10use memmap2::Mmap;
11
12use crate::util::{ReaderAt, read_tuple};
13
14pub const HEADER_SIZE: u64 = 256 * 8 * 2; #[derive(Debug, Copy, Clone, Default)]
24pub(crate) struct TableEntry {
25 pub(crate) offset: u64,
26 pub(crate) length: u64,
27}
28
29pub struct Cdb<R, H> {
63 pub(crate) reader: R,
64 pub(crate) header: [TableEntry; 256],
65 _hasher: PhantomData<H>,
66 #[cfg(feature = "mmap")]
67 mmap: Option<Mmap>,
68}
69
70impl<H: Hasher + Default> Cdb<File, H> {
71 pub fn open(path: impl AsRef<Path>) -> io::Result<Self> {
76 let file = File::open(path)?;
77 Self::new(file)
78 }
79
80 #[cfg(feature = "mmap")]
107 pub fn open_mmap(path: impl AsRef<Path>) -> io::Result<Self> {
108 let file = File::open(path)?;
109 let mmap = unsafe { Mmap::map(&file)? };
110 let mut cdb = Cdb {
111 reader: file, header: [TableEntry::default(); 256],
113 _hasher: PhantomData,
114 mmap: Some(mmap),
115 };
116 cdb.read_header_from_mmap()?; Ok(cdb)
118 }
119}
120
121impl<R: ReaderAt, H: Hasher + Default> Cdb<R, H> {
122 pub fn new(reader: R) -> io::Result<Self> {
126 let mut cdb = Cdb {
127 reader,
128 header: [TableEntry::default(); 256],
129 _hasher: PhantomData,
130 #[cfg(feature = "mmap")]
131 mmap: None, };
133 cdb.read_header()?;
134 Ok(cdb)
135 }
136
137 fn read_header(&mut self) -> io::Result<()> {
139 #[cfg(feature = "mmap")]
140 if let Some(mmap_ref) = self.mmap.as_ref() {
141 self.header = Self::read_header_from_mmap_internal(mmap_ref)?;
142 return Ok(());
143 }
144 let mut header_buf = [0u8; HEADER_SIZE as usize];
146 self.reader.read_exact_at(&mut header_buf, 0)?;
147
148 for i in 0..256 {
149 let offset_bytes: [u8; 8] =
150 header_buf[i * 16..i * 16 + 8].try_into().map_err(|_| {
151 io::Error::new(ErrorKind::InvalidData, "Failed to slice offset from header")
152 })?;
153 let length_bytes: [u8; 8] =
154 header_buf[i * 16 + 8..i * 16 + 16]
155 .try_into()
156 .map_err(|_| {
157 io::Error::new(ErrorKind::InvalidData, "Failed to slice length from header")
158 })?;
159
160 self.header[i] = TableEntry {
161 offset: u64::from_le_bytes(offset_bytes),
162 length: u64::from_le_bytes(length_bytes),
163 };
164 }
165 Ok(())
166 }
167
168 #[cfg(feature = "mmap")]
169 fn read_header_from_mmap(&mut self) -> io::Result<()> {
170 if let Some(mmap_ref) = self.mmap.as_ref() {
171 self.header = Self::read_header_from_mmap_internal(mmap_ref)?;
172 Ok(())
173 } else {
174 Err(io::Error::other("Mmap not available for reading header"))
175 }
176 }
177
178 #[cfg(feature = "mmap")]
179 fn read_header_from_mmap_internal(mmap_ref: &Mmap) -> io::Result<[TableEntry; 256]> {
180 if mmap_ref.len() < HEADER_SIZE as usize {
181 return Err(io::Error::other("Mmap data is smaller than header size"));
182 }
183 let header_buf = &mmap_ref[0..HEADER_SIZE as usize];
184 let mut header = [TableEntry::default(); 256];
185
186 for i in 0..256 {
187 let offset_bytes: [u8; 8] =
188 header_buf[i * 16..i * 16 + 8].try_into().map_err(|_| {
189 io::Error::new(
190 ErrorKind::InvalidData,
191 "Failed to slice offset from mmap header",
192 )
193 })?;
194 let length_bytes: [u8; 8] =
195 header_buf[i * 16 + 8..i * 16 + 16]
196 .try_into()
197 .map_err(|_| {
198 io::Error::new(
199 ErrorKind::InvalidData,
200 "Failed to slice length from mmap header",
201 )
202 })?;
203
204 header[i] = TableEntry {
205 offset: u64::from_le_bytes(offset_bytes),
206 length: u64::from_le_bytes(length_bytes),
207 };
208 }
209 Ok(header)
210 }
211
212 pub fn get(&self, key: &[u8]) -> io::Result<Option<Vec<u8>>> {
254 let mut hasher = H::default();
255 hasher.write(key);
256 let hash_val = hasher.finish();
257
258 let table_idx = (hash_val & 0xff) as usize;
259 let table_entry = self.header[table_idx];
260
261 if table_entry.length == 0 {
262 return Ok(None);
263 }
264
265 let starting_slot = (hash_val >> 8) % table_entry.length;
266
267 for i in 0..table_entry.length {
268 let slot_to_check = (starting_slot + i) % table_entry.length;
269 let slot_offset = table_entry.offset + slot_to_check * 16;
270
271 #[cfg(feature = "mmap")]
272 let (entry_hash, data_offset) = if let Some(mmap_ref) = self.mmap.as_ref() {
273 read_tuple_from_mmap(mmap_ref, slot_offset)?
274 } else {
275 let mut slot_buffer = [0u8; 16];
276 self.reader.read_exact_at(&mut slot_buffer, slot_offset)?;
277 let h = u64::from_le_bytes(slot_buffer[0..8].try_into().map_err(|_| {
278 io::Error::new(
279 ErrorKind::InvalidData,
280 "Failed to slice entry_hash from slot",
281 )
282 })?);
283 let d = u64::from_le_bytes(slot_buffer[8..16].try_into().map_err(|_| {
284 io::Error::new(
285 ErrorKind::InvalidData,
286 "Failed to slice data_offset from slot",
287 )
288 })?);
289 (h, d)
290 };
291
292 #[cfg(not(feature = "mmap"))]
293 let (entry_hash, data_offset) = {
294 let mut slot_buffer = [0u8; 16];
295 self.reader.read_exact_at(&mut slot_buffer, slot_offset)?;
296 let h = u64::from_le_bytes(slot_buffer[0..8].try_into().map_err(|_| {
297 io::Error::new(
298 ErrorKind::InvalidData,
299 "Failed to slice entry_hash from slot",
300 )
301 })?);
302 let d = u64::from_le_bytes(slot_buffer[8..16].try_into().map_err(|_| {
303 io::Error::new(
304 ErrorKind::InvalidData,
305 "Failed to slice data_offset from slot",
306 )
307 })?);
308 (h, d)
309 };
310
311 if entry_hash == 0 && data_offset == 0 {
312 return Ok(None);
313 }
314
315 if entry_hash == hash_val {
316 match self.get_value_at(data_offset, key)? {
317 Some(value) => return Ok(Some(value)),
318 None => continue,
319 }
320 }
321 }
322 Ok(None)
323 }
324
325 fn get_value_at(&self, data_offset: u64, expected_key: &[u8]) -> io::Result<Option<Vec<u8>>> {
328 #[cfg(feature = "mmap")]
329 if let Some(mmap_ref) = self.mmap.as_ref() {
330 return self.get_value_at_mmap(mmap_ref, data_offset, expected_key);
331 }
332
333 let (key_len, val_len) = read_tuple(&self.reader, data_offset)?;
334
335 if key_len as usize != expected_key.len() {
336 return Ok(None);
337 }
338
339 if expected_key.is_empty() {
340 let mut value_buf = vec![0u8; val_len as usize];
341 if val_len > 0 {
342 self.reader
343 .read_exact_at(&mut value_buf, data_offset + 16)?;
344 }
345
346 return Ok(Some(value_buf));
347 }
348
349 let mut key_buf = vec![0u8; key_len as usize];
350 self.reader.read_exact_at(&mut key_buf, data_offset + 16)?;
351
352 if key_buf != expected_key {
353 return Ok(None);
354 }
355
356 let mut value_buf = vec![0u8; val_len as usize];
357 if val_len > 0 {
358 self.reader
359 .read_exact_at(&mut value_buf, data_offset + 16 + key_len)?;
360 }
361 Ok(Some(value_buf))
362 }
363
364 #[cfg(feature = "mmap")]
365 fn get_value_at_mmap(
366 &self,
367 mmap_ref: &Mmap,
368 data_offset: u64,
369 expected_key: &[u8],
370 ) -> io::Result<Option<Vec<u8>>> {
371 let len_offset_usize = data_offset as usize;
372 if len_offset_usize + 16 > mmap_ref.len() {
373 return Err(io::Error::new(
374 ErrorKind::UnexpectedEof,
375 "Mmap bounds exceeded for key/value lengths",
376 ));
377 }
378
379 let key_len_bytes: [u8; 8] = mmap_ref[len_offset_usize..len_offset_usize + 8]
380 .try_into()
381 .map_err(|_| {
382 io::Error::new(ErrorKind::InvalidData, "Failed to slice key_len from mmap")
383 })?;
384 let val_len_bytes: [u8; 8] = mmap_ref[len_offset_usize + 8..len_offset_usize + 16]
385 .try_into()
386 .map_err(|_| {
387 io::Error::new(ErrorKind::InvalidData, "Failed to slice val_len from mmap")
388 })?;
389
390 let key_len = u64::from_le_bytes(key_len_bytes);
391 let val_len = u64::from_le_bytes(val_len_bytes);
392
393 if key_len as usize != expected_key.len() {
394 return Ok(None);
395 }
396
397 if expected_key.is_empty() {
398 let value_buf = if val_len > 0 {
399 let start = (data_offset + 16) as usize;
400 let end = start + val_len as usize;
401 if end > mmap_ref.len() {
402 return Err(io::Error::new(
403 ErrorKind::InvalidData,
404 "Mmap bounds exceeded for value",
405 ));
406 }
407 mmap_ref[start..end].to_vec()
408 } else {
409 Vec::new()
410 };
411 return Ok(Some(value_buf));
412 }
413
414 let key_start = (data_offset + 16) as usize;
415 let key_end = key_start + key_len as usize;
416
417 if key_end > mmap_ref.len() {
418 return Err(io::Error::new(
419 ErrorKind::InvalidData,
420 "Mmap bounds exceeded for key",
421 ));
422 }
423 let key_buf_slice = &mmap_ref[key_start..key_end];
424
425 if key_buf_slice != expected_key {
426 return Ok(None);
427 }
428
429 let value_buf = if val_len > 0 {
430 let val_start = key_end;
431 let val_end = val_start + val_len as usize;
432 if val_end > mmap_ref.len() {
433 return Err(io::Error::new(
434 ErrorKind::InvalidData,
435 "Mmap bounds exceeded for value",
436 ));
437 }
438 mmap_ref[val_start..val_end].to_vec()
439 } else {
440 Vec::new()
441 };
442
443 Ok(Some(value_buf))
444 }
445
446 pub fn iter(&self) -> crate::iterator::CdbIterator<'_, R, H> {
450 crate::iterator::CdbIterator::new(self)
451 }
452}
453
454#[cfg(feature = "mmap")]
455fn read_tuple_from_mmap(mmap: &Mmap, offset: u64) -> io::Result<(u64, u64)> {
456 let start = offset as usize;
457 let end = start + 16;
458
459 if end > mmap.len() {
460 return Err(io::Error::new(
461 ErrorKind::UnexpectedEof,
462 "Attempted to read beyond mmap bounds for tuple",
463 ));
464 }
465
466 let bytes = &mmap[start..end];
467 let first = u64::from_le_bytes(bytes[0..8].try_into().map_err(|_| {
468 io::Error::new(
469 ErrorKind::InvalidData,
470 "Failed to slice first u64 from mmap",
471 )
472 })?);
473 let second = u64::from_le_bytes(bytes[8..16].try_into().map_err(|_| {
474 io::Error::new(
475 ErrorKind::InvalidData,
476 "Failed to slice second u64 from mmap",
477 )
478 })?);
479
480 Ok((first, second))
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486 use crate::{hash::CdbHash, writer::CdbWriter};
487 #[cfg(feature = "mmap")]
488 use std::io::Write;
489 use std::{hash::Hasher as StdHasher, io::Cursor};
490 use tempfile::NamedTempFile;
491
492 fn create_in_memory_cdb_with_hasher<H: Hasher + Default>(
493 records: &[(&[u8], &[u8])],
494 ) -> Cdb<Cursor<Vec<u8>>, H> {
495 let mut writer = CdbWriter::<_, H>::new(Cursor::new(Vec::new())).unwrap();
496 for (key, value) in records {
497 writer.put(key, value).unwrap();
498 }
499 writer.finalize().unwrap();
500 let cursor = writer.into_inner().unwrap();
501 Cdb::<_, H>::new(cursor).unwrap()
502 }
503
504 fn create_in_memory_cdb(records: &[(&[u8], &[u8])]) -> Cdb<Cursor<Vec<u8>>, CdbHash> {
505 create_in_memory_cdb_with_hasher::<CdbHash>(records)
506 }
507
508 #[test]
509 fn test_cdb_new_and_get_simple() {
510 let records = vec![
511 (b"key1".as_ref(), b"value1".as_ref()),
512 (b"key2".as_ref(), b"value2".as_ref()),
513 ];
514 let cdb = create_in_memory_cdb(&records);
515
516 assert_eq!(cdb.get(b"key1").unwrap().unwrap(), b"value1");
517 assert_eq!(cdb.get(b"key2").unwrap().unwrap(), b"value2");
518 assert!(cdb.get(b"key3").unwrap().is_none());
519 }
520
521 #[test]
522 fn test_cdb_get_empty_key() {
523 let records = vec![(b"".as_ref(), b"empty_value".as_ref())];
524 let cdb = create_in_memory_cdb(&records);
525 assert_eq!(cdb.get(b"").unwrap().unwrap(), b"empty_value");
526 }
527
528 #[test]
529 fn test_cdb_get_empty_value() {
530 let records = vec![(b"key_empty_val".as_ref(), b"".as_ref())];
531 let cdb = create_in_memory_cdb(&records);
532 assert_eq!(cdb.get(b"key_empty_val").unwrap().unwrap(), b"");
533 }
534
535 #[test]
536 fn test_cdb_get_empty_key_and_value() {
537 let records = vec![(b"".as_ref(), b"".as_ref())];
538 let cdb = create_in_memory_cdb(&records);
539 assert_eq!(cdb.get(b"").unwrap().unwrap(), b"");
540 }
541
542 #[test]
543 fn test_cdb_get_from_empty_db() {
544 let cdb = create_in_memory_cdb(&[]);
545 assert!(cdb.get(b"any_key").unwrap().is_none());
546 }
547
548 #[test]
549 fn test_cdb_open_non_existent_file() {
550 let result = Cdb::<File, CdbHash>::open("non_existent_file.cdb");
551 assert!(result.is_err());
552 assert_eq!(result.err().unwrap().kind(), ErrorKind::NotFound);
553 }
554
555 #[test]
556 fn test_cdb_open_and_get_from_file() {
557 let temp_file = NamedTempFile::new().unwrap();
558 let path = temp_file.path();
559
560 {
561 let file = File::create(path).unwrap();
562 let mut writer = CdbWriter::<_, CdbHash>::new(file).unwrap();
563 writer.put(b"file_key", b"file_value").unwrap();
564 writer.finalize().unwrap();
565 }
566
567 let cdb = Cdb::<File, CdbHash>::open(path).unwrap();
568 assert_eq!(cdb.get(b"file_key").unwrap().unwrap(), b"file_value");
569 assert!(cdb.get(b"other_key").unwrap().is_none());
570
571 #[cfg(feature = "mmap")]
572 {
573 let cdb_mmap = Cdb::<File, CdbHash>::open_mmap(path).unwrap();
574 assert_eq!(cdb_mmap.get(b"file_key").unwrap().unwrap(), b"file_value");
575 assert!(cdb_mmap.get(b"other_key").unwrap().is_none());
576 }
577 }
578
579 #[derive(Clone, Default)]
580 struct CollisionHasher {
581 state: u64,
582 }
583
584 impl StdHasher for CollisionHasher {
585 fn finish(&self) -> u64 {
586 if self.state == u64::from_le_bytes(*b"key_A ") {
587 0x0102030405060708
588 } else if self.state == u64::from_le_bytes(*b"key_B ") {
589 0x1112131415161718
590 } else if self.state == u64::from_le_bytes(*b"key_C ") {
591 0x0102030405060708
592 } else {
593 self.state
594 }
595 }
596
597 fn write(&mut self, bytes: &[u8]) {
598 if bytes.len() <= 8 {
599 let mut arr = [0u8; 8];
600 arr[..bytes.len()].copy_from_slice(bytes);
601 self.state = u64::from_le_bytes(arr);
602 } else {
603 self.state = 0xDEADBEEFCAFEFACE;
604 }
605 }
606 }
607
608 #[test]
609 fn test_cdb_get_with_hash_collision() {
610 let records = [
611 (b"key_A".as_ref(), b"value_A".as_ref()),
612 (b"key_B".as_ref(), b"value_B".as_ref()),
613 (b"key_C".as_ref(), b"value_C".as_ref()),
614 ];
615 let cdb = create_in_memory_cdb_with_hasher::<CollisionHasher>(&records);
616
617 assert_eq!(cdb.get(b"key_A").unwrap().unwrap(), b"value_A");
618 assert_eq!(cdb.get(b"key_B").unwrap().unwrap(), b"value_B");
619 assert_eq!(cdb.get(b"key_C").unwrap().unwrap(), b"value_C");
620 assert!(cdb.get(b"key_D").unwrap().is_none());
621 }
622
623 #[test]
624 fn test_read_header_invalid_data_short() {
625 let data = vec![0u8; HEADER_SIZE as usize - 10];
626 let cursor = Cursor::new(data.clone());
627 let result = Cdb::<_, CdbHash>::new(cursor);
628 assert!(result.is_err());
629 assert_eq!(result.err().unwrap().kind(), ErrorKind::UnexpectedEof);
630
631 #[cfg(feature = "mmap")]
632 {
633 let temp_file = NamedTempFile::new().unwrap();
634 let path = temp_file.path();
635 {
636 let mut file = File::create(path).unwrap();
637 file.write_all(&data).unwrap();
638 }
639 let result_mmap = Cdb::<File, CdbHash>::open_mmap(path);
640 assert!(result_mmap.is_err());
641 let err_kind = result_mmap.err().unwrap().kind();
642 assert!(
643 err_kind == ErrorKind::InvalidData || err_kind == ErrorKind::Other,
644 "Unexpected error kind: {:?}",
645 err_kind
646 );
647 }
648 }
649
650 #[test]
651 fn test_header_size_value() {
652 assert_eq!(HEADER_SIZE, 256 * 8 * 2);
653 }
654}