binary_set/
lib.rs

1use std::{cmp::Ordering, num::NonZeroU32, ops::Deref, path::Path};
2
3use idx_file::{search, AvltrieeOrd, IdxFile, IdxFileAllocator, IdxFileAvlTriee};
4use various_data_file::{DataAddress, VariousDataFile};
5
6type BinaryIdxFile = IdxFile<DataAddress, [u8]>;
7
8pub struct BinarySet {
9    index: BinaryIdxFile,
10    data_file: VariousDataFile,
11}
12
13impl Deref for BinarySet {
14    type Target = BinaryIdxFile;
15    fn deref(&self) -> &Self::Target {
16        &self.index
17    }
18}
19
20impl AsRef<IdxFileAvlTriee<DataAddress, [u8]>> for BinarySet {
21    fn as_ref(&self) -> &IdxFileAvlTriee<DataAddress, [u8]> {
22        self
23    }
24}
25
26impl AvltrieeOrd<DataAddress, [u8], IdxFileAllocator<DataAddress>> for BinarySet {
27    fn cmp(&self, left: &DataAddress, right: &[u8]) -> Ordering {
28        self.data_file.bytes(left).cmp(right)
29    }
30}
31
32impl BinarySet {
33    /// Opens the file and creates the BinarySet.
34    /// /// # Arguments
35    /// * `path` - Path of file to save data
36    /// * `allocation_lot` - Extends the specified size when the file size becomes insufficient due to data addition.
37    /// If you expect to add a lot of data, specifying a larger size will improve performance.
38    pub fn new<P: AsRef<Path>>(path: P, allocation_lot: u32) -> Self {
39        let path = path.as_ref();
40        Self {
41            index: IdxFile::new(path.with_extension("i"), allocation_lot),
42            data_file: VariousDataFile::new(path.with_extension("d")),
43        }
44    }
45
46    /// Returns the value of the specified row. Returns None if the row does not exist.
47    pub fn bytes(&self, row: NonZeroU32) -> Option<&[u8]> {
48        self.index.get(row).map(|v| self.data_file.bytes(v))
49    }
50
51    /// Search for a sequence of bytes.
52    pub fn row(&self, content: &[u8]) -> Option<NonZeroU32> {
53        let found = search::edge(self, content);
54        (found.ord == Ordering::Equal).then(|| found.row).flatten()
55    }
56
57    /// Finds a sequence of bytes, inserts it if it doesn't exist, and returns a row.
58    pub fn row_or_insert(&mut self, content: &[u8]) -> NonZeroU32 {
59        let found = search::edge(self, content);
60        if let (Ordering::Equal, Some(found_row)) = (found.ord, found.row) {
61            found_row
62        } else {
63            let row = unsafe { NonZeroU32::new_unchecked(self.index.rows_count() + 1) };
64            unsafe {
65                self.index.insert_unique_unchecked(
66                    row,
67                    self.data_file.insert(content).address().clone(),
68                    found,
69                );
70            }
71            row
72        }
73    }
74}