1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
use std::{
cmp::Ordering::{self, Equal, Greater, Less},
num::NonZeroU32,
path::Path,
};
pub use idx_file::{
AvltrieeIter, AvltrieeSearch, AvltrieeUpdate, FileMmap, IdxFile, IdxFileAvlTriee,
};
use idx_file::IdxFileAllocator;
use various_data_file::{DataAddress, VariousDataFile};
type IdxBinaryAvltriee = IdxFileAvlTriee<DataAddress, [u8]>;
type IdxBinaryAllocator = IdxFileAllocator<DataAddress>;
pub struct IdxBinary {
index: IdxFile<DataAddress, [u8]>,
data_file: VariousDataFile,
}
impl AsRef<IdxBinaryAvltriee> for IdxBinary {
fn as_ref(&self) -> &IdxBinaryAvltriee {
&self.index
}
}
impl AsMut<IdxBinaryAvltriee> for IdxBinary {
fn as_mut(&mut self) -> &mut IdxBinaryAvltriee {
&mut self.index
}
}
impl AvltrieeSearch<DataAddress, [u8], IdxBinaryAllocator> for IdxBinary {
fn cmp(left: &[u8], right: &[u8]) -> Ordering {
let mut left = left.into_iter().fuse();
let mut right = right.into_iter().fuse();
let mut l;
let mut r;
let mut ll;
let mut rr;
macro_rules! to_digit {
($v:expr) => {
$v.and_then(|v| {
let v = *v as isize;
(v >= ('0' as isize) && v <= ('9' as isize)).then_some(v - 48)
})
};
}
macro_rules! read_left {
() => {{
l = left.next();
ll = to_digit!(l);
}};
}
macro_rules! read_right {
() => {{
r = right.next();
rr = to_digit!(r);
}};
}
macro_rules! return_unless_equal {
($ord:expr) => {
match $ord {
Equal => {}
lastcmp => return lastcmp,
}
};
}
read_left!();
read_right!();
'nondigits: loop {
match (l, r) {
(Some(l_), Some(r_)) => match (ll, rr) {
(Some(ll_), Some(rr_)) => {
if ll_ == 0 || rr_ == 0 {
// left-aligned matching. (`015` < `12`)
return_unless_equal!(ll_.cmp(&rr_));
'digits_left: loop {
read_left!();
read_right!();
match (ll, rr) {
(Some(ll_), Some(rr_)) => return_unless_equal!(ll_.cmp(&rr_)),
(Some(_), None) => return Greater,
(None, Some(_)) => return Less,
(None, None) => break 'digits_left,
}
}
} else {
// right-aligned matching. (`15` < `123`)
let mut lastcmp = ll_.cmp(&rr_);
'digits_right: loop {
read_left!();
read_right!();
match (ll, rr) {
(Some(ll_), Some(rr_)) => {
// `lastcmp` is only used when there are the same number of
// digits, so we only update it.
if lastcmp == Equal {
lastcmp = ll_.cmp(&rr_);
}
}
(Some(_), None) => return Greater,
(None, Some(_)) => return Less,
(None, None) => break 'digits_right,
}
}
return_unless_equal!(lastcmp);
}
continue 'nondigits; // do not read from the iterators again
}
(_, _) => return_unless_equal!(l_.cmp(r_)),
},
(Some(_), None) => return Greater,
(None, Some(_)) => return Less,
(None, None) => return Equal,
}
read_left!();
read_right!();
}
}
fn invert<'a>(&'a self, value: &'a DataAddress) -> &[u8] {
self.data_file.bytes(value)
}
}
impl AvltrieeUpdate<DataAddress, [u8], IdxBinaryAllocator> for IdxBinary {
fn convert_on_insert_unique(&mut self, input: &[u8]) -> DataAddress {
self.data_file.insert(input).into_address()
}
fn on_delete(&mut self, row: NonZeroU32) {
if let Some((true, node)) = self.index.is_unique(row) {
self.data_file.delete((**node).clone());
}
}
}
impl IdxBinary {
/// Opens the file and creates the IdxBinary.
/// # Arguments
/// * `path` - Path of directory to save data.
/// * `allocation_lot` - Extends the specified size when the file size becomes insufficient due to data addition.
/// If you expect to add a lot of data, specifying a larger size will improve performance.
pub fn new<P: AsRef<Path>>(directory: P, allocation_lot: u32) -> Self {
let path = directory.as_ref();
Self {
index: IdxFile::new(
{
let mut path = path.to_path_buf();
path.push(".i");
path
},
allocation_lot,
),
data_file: VariousDataFile::new({
let mut path = path.to_path_buf();
path.push(".d");
path
}),
}
}
/// Opens the file and creates the IdxBinary.
/// /// # Arguments
/// * `path` - Path of part of filename without extension to save data.
/// * `allocation_lot` - Extends the specified size when the file size becomes insufficient due to data addition.
/// If you expect to add a lot of data, specifying a larger size will improve performance.
pub fn new_ext<P: AsRef<Path>>(path: P, allocation_lot: u32) -> Self {
let path = path.as_ref();
Self {
index: IdxFile::new(path.with_extension("i"), allocation_lot),
data_file: VariousDataFile::new(path.with_extension("d")),
}
}
/// Finds a sequence of bytes, inserts it if it doesn't exist, and returns a row.
pub fn row_or_insert(&mut self, content: &[u8]) -> NonZeroU32 {
let edge = self.edge(content);
if let (Some(row), Ordering::Equal) = edge {
row
} else {
let row = unsafe { NonZeroU32::new_unchecked(self.index.rows_count() + 1) };
unsafe {
self.index.insert_unique_unchecked(
row,
self.data_file.insert(content).into_address(),
edge,
);
}
row
}
}
}