idx_binary/
lib.rs

1use std::{
2    cmp::Ordering::{self, Equal, Greater, Less},
3    num::NonZeroU32,
4    path::Path,
5};
6
7pub use idx_file::{
8    AvltrieeIter, AvltrieeSearch, AvltrieeUpdate, FileMmap, IdxFile, IdxFileAvlTriee,
9};
10
11use idx_file::{AvltrieeNode, IdxFileAllocator};
12use various_data_file::{DataAddress, VariousDataFile};
13
14type IdxBinaryAvltriee = IdxFileAvlTriee<DataAddress, [u8]>;
15type IdxBinaryAllocator = IdxFileAllocator<DataAddress>;
16
17pub struct IdxBinary {
18    index: IdxFile<DataAddress, [u8]>,
19    data_file: VariousDataFile,
20}
21
22impl AsRef<IdxBinaryAvltriee> for IdxBinary {
23    fn as_ref(&self) -> &IdxBinaryAvltriee {
24        &self.index
25    }
26}
27
28impl AsMut<IdxBinaryAvltriee> for IdxBinary {
29    fn as_mut(&mut self) -> &mut IdxBinaryAvltriee {
30        &mut self.index
31    }
32}
33
34impl AvltrieeSearch<DataAddress, [u8], IdxBinaryAllocator> for IdxBinary {
35    fn cmp(left: &[u8], right: &[u8]) -> Ordering {
36        let mut left = left.into_iter().fuse();
37        let mut right = right.into_iter().fuse();
38
39        let mut l;
40        let mut r;
41        let mut ll;
42        let mut rr;
43
44        macro_rules! to_digit {
45            ($v:expr) => {
46                $v.and_then(|v| {
47                    let v = *v as isize;
48                    (v >= ('0' as isize) && v <= ('9' as isize)).then_some(v - 48)
49                })
50            };
51        }
52
53        macro_rules! read_left {
54            () => {{
55                l = left.next();
56                ll = to_digit!(l);
57            }};
58        }
59
60        macro_rules! read_right {
61            () => {{
62                r = right.next();
63                rr = to_digit!(r);
64            }};
65        }
66
67        macro_rules! return_unless_equal {
68            ($ord:expr) => {
69                match $ord {
70                    Equal => {}
71                    lastcmp => return lastcmp,
72                }
73            };
74        }
75
76        read_left!();
77        read_right!();
78        'nondigits: loop {
79            match (l, r) {
80                (Some(l_), Some(r_)) => match (ll, rr) {
81                    (Some(ll_), Some(rr_)) => {
82                        if ll_ == 0 || rr_ == 0 {
83                            // left-aligned matching. (`015` < `12`)
84                            return_unless_equal!(ll_.cmp(&rr_));
85                            'digits_left: loop {
86                                read_left!();
87                                read_right!();
88                                match (ll, rr) {
89                                    (Some(ll_), Some(rr_)) => return_unless_equal!(ll_.cmp(&rr_)),
90                                    (Some(_), None) => return Greater,
91                                    (None, Some(_)) => return Less,
92                                    (None, None) => break 'digits_left,
93                                }
94                            }
95                        } else {
96                            // right-aligned matching. (`15` < `123`)
97                            let mut lastcmp = ll_.cmp(&rr_);
98                            'digits_right: loop {
99                                read_left!();
100                                read_right!();
101                                match (ll, rr) {
102                                    (Some(ll_), Some(rr_)) => {
103                                        // `lastcmp` is only used when there are the same number of
104                                        // digits, so we only update it.
105                                        if lastcmp == Equal {
106                                            lastcmp = ll_.cmp(&rr_);
107                                        }
108                                    }
109                                    (Some(_), None) => return Greater,
110                                    (None, Some(_)) => return Less,
111                                    (None, None) => break 'digits_right,
112                                }
113                            }
114                            return_unless_equal!(lastcmp);
115                        }
116                        continue 'nondigits; // do not read from the iterators again
117                    }
118                    (_, _) => return_unless_equal!(l_.cmp(r_)),
119                },
120                (Some(_), None) => return Greater,
121                (None, Some(_)) => return Less,
122                (None, None) => return Equal,
123            }
124            read_left!();
125            read_right!();
126        }
127    }
128
129    /// Returns the value of the specified row. Returns None if the row does not exist.
130    fn value(&self, row: NonZeroU32) -> Option<&[u8]> {
131        self.as_ref().node(row).map(|v| self.data_file.bytes(v))
132    }
133
134    /// Returns the value of the specified row.
135    unsafe fn value_unchecked(&self, row: NonZeroU32) -> &[u8] {
136        self.data_file.bytes(self.as_ref().node_unchecked(row))
137    }
138
139    /// Returns node and value of the specified row.
140    unsafe fn node_value_unchecked(&self, row: NonZeroU32) -> (&AvltrieeNode<DataAddress>, &[u8]) {
141        let node = self.as_ref().node_unchecked(row);
142        (node, self.data_file.bytes(node))
143    }
144}
145
146impl AvltrieeUpdate<DataAddress, [u8], IdxBinaryAllocator> for IdxBinary {
147    fn convert_on_insert_unique(&mut self, input: &[u8]) -> DataAddress {
148        self.data_file.insert(input).into_address()
149    }
150
151    fn on_delete(&mut self, row: NonZeroU32) {
152        if let Some((true, node)) = self.index.is_unique(row) {
153            self.data_file.delete((**node).clone());
154        }
155    }
156}
157
158impl IdxBinary {
159    /// Opens the file and creates the IdxBinary.
160    /// # Arguments
161    /// * `path` - Path of directory to save data.
162    /// * `allocation_lot` - Extends the specified size when the file size becomes insufficient due to data addition.
163    /// If you expect to add a lot of data, specifying a larger size will improve performance.
164    pub fn new<P: AsRef<Path>>(directory: P, allocation_lot: u32) -> Self {
165        let path = directory.as_ref();
166        Self {
167            index: IdxFile::new(
168                {
169                    let mut path = path.to_path_buf();
170                    path.push(".i");
171                    path
172                },
173                allocation_lot,
174            ),
175            data_file: VariousDataFile::new({
176                let mut path = path.to_path_buf();
177                path.push(".d");
178                path
179            }),
180        }
181    }
182
183    /// Opens the file and creates the IdxBinary.
184    /// # Arguments
185    /// * `path` - Path of part of filename without extension to save data.
186    /// * `allocation_lot` - Extends the specified size when the file size becomes insufficient due to data addition.
187    /// If you expect to add a lot of data, specifying a larger size will improve performance.
188    pub fn new_ext<P: AsRef<Path>>(path: P, allocation_lot: u32) -> Self {
189        let path = path.as_ref();
190        Self {
191            index: IdxFile::new(path.with_extension("i"), allocation_lot),
192            data_file: VariousDataFile::new(path.with_extension("d")),
193        }
194    }
195
196    /// Finds a sequence of bytes, inserts it if it doesn't exist, and returns a row.
197    pub fn row_or_insert(&mut self, content: &[u8]) -> NonZeroU32 {
198        let edge = self.edge(content);
199        if let (Some(row), Ordering::Equal) = edge {
200            row
201        } else {
202            let row = unsafe { NonZeroU32::new_unchecked(self.index.rows_count() + 1) };
203            unsafe {
204                self.index.insert_unique_unchecked(
205                    row,
206                    self.data_file.insert(content).into_address(),
207                    edge,
208                );
209            }
210            row
211        }
212    }
213}