1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
//! `obkv` stands for optimized-bytes key and a value store.
//!
//! The main purpose of this library is to be able to store key value entries
//! where the key can be represented by an optimized amount of bytes,
//! this allows a lot of optimizations.
//!
//! ## Example: Creating an `obkv` and iterating over the entries
//!
//! ```
//! use obkv::{KvWriterU16, KvReaderU16};
//!
//! let mut writer = KvWriterU16::memory();
//! writer.insert(0, b"hello").unwrap();
//! writer.insert(1, b"blue").unwrap();
//! writer.insert(255, b"world").unwrap();
//! let obkv = writer.into_inner().unwrap();
//!
//! let mut iter = KvReaderU16::new(&obkv).iter();
//! assert_eq!(iter.next(), Some((0, &b"hello"[..])));
//! assert_eq!(iter.next(), Some((1, &b"blue"[..])));
//! assert_eq!(iter.next(), Some((255, &b"world"[..])));
//! assert_eq!(iter.next(), None);
//! assert_eq!(iter.next(), None); // is it fused?
//! ```
#![warn(missing_docs)]
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
mod varint;
use std::convert::{TryFrom, TryInto};
use std::io::{self, Error, ErrorKind::Other};
use std::iter::Fuse;
use std::marker::PhantomData;
use self::varint::{varint_decode32, varint_encode32};
/// An `obkv` writer that uses `u8` keys.
pub type KvWriterU8<W> = KvWriter<W, u8>;
/// An `obkv` writer that uses `u16` keys.
pub type KvWriterU16<W> = KvWriter<W, u16>;
/// An `obkv` writer that uses `u32` keys.
pub type KvWriterU32<W> = KvWriter<W, u32>;
/// An `obkv` writer that uses `u64` keys.
pub type KvWriterU64<W> = KvWriter<W, u64>;
/// A reader that can read `obkv`s with `u8` keys.
pub type KvReaderU8<'a> = KvReader<'a, u8>;
/// A reader that can read `obkv`s with `u16` keys.
pub type KvReaderU16<'a> = KvReader<'a, u16>;
/// A reader that can read `obkv`s with `u32` keys.
pub type KvReaderU32<'a> = KvReader<'a, u32>;
/// A reader that can read `obkv`s with `u64` keys.
pub type KvReaderU64<'a> = KvReader<'a, u64>;
/// An `obkv` database writer.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct KvWriter<W, K> {
last_key: Option<K>,
writer: W,
}
impl<K> KvWriter<Vec<u8>, K> {
/// Creates an in memory `KvWriter` that writes the bytes into a `Vec<u8>`.
///
/// ```
/// use obkv::KvWriterU16;
///
/// let mut writer = KvWriterU16::memory();
///
/// writer.insert(0, b"hello").unwrap();
/// writer.insert(1, b"blue").unwrap();
/// writer.insert(255, b"world").unwrap();
///
/// let vec = writer.into_inner().unwrap();
/// ```
pub fn memory() -> KvWriter<Vec<u8>, K> {
KvWriter {
last_key: None,
writer: Vec::new(),
}
}
}
impl<W, K> KvWriter<W, K> {
/// Creates a `KvWriter` that writes the bytes into
/// the given `io::Write` type (e.g. `File`, `Vec<u8>`).
///
/// ```
/// use obkv::KvWriterU16;
///
/// let mut writer = KvWriterU16::new(Vec::new());
///
/// writer.insert(0, b"hello").unwrap();
/// writer.insert(1, b"blue").unwrap();
/// writer.insert(255, b"world").unwrap();
///
/// let vec = writer.into_inner().unwrap();
/// ```
pub fn new(writer: W) -> KvWriter<W, K> {
KvWriter {
last_key: None,
writer,
}
}
}
impl<W: io::Write, K: Key + PartialOrd> KvWriter<W, K> {
/// Insert a key value pair into the database, keys must be
/// inserted in order and must be inserted only one time.
///
/// ```
/// use obkv::KvWriterU16;
///
/// let mut writer = KvWriterU16::new(Vec::new());
///
/// writer.insert(0, b"hello").unwrap();
/// writer.insert(1, b"blue").unwrap();
/// writer.insert(255, b"world").unwrap();
///
/// let vec = writer.into_inner().unwrap();
/// ```
pub fn insert<A: AsRef<[u8]>>(&mut self, key: K, value: A) -> io::Result<()> {
if self.last_key.map_or(false, |last| key <= last) {
return Err(Error::new(
Other,
"keys must be inserted in order and only one time",
));
}
let val = value.as_ref();
let val_len = match val.len().try_into() {
Ok(len) => len,
Err(_) => return Err(Error::new(Other, "value length is bigger than u32 MAX")),
};
let mut buffer = [0; 5];
let len_bytes = varint_encode32(&mut buffer, val_len);
self.writer.write_all(key.to_be_bytes().as_ref())?;
self.writer.write_all(len_bytes)?;
self.writer.write_all(val)?;
self.last_key = Some(key);
Ok(())
}
/// Insert the key value pairs into the database, keys must be
/// inserted in order and must be inserted only one time.
pub fn extend<I, V>(&mut self, iter: I) -> io::Result<()>
where
I: IntoIterator<Item = (K, V)>,
V: AsRef<[u8]>,
{
for (k, v) in iter {
self.insert(k, v)?;
}
Ok(())
}
/// Returns `true` if not entry was written into the writer.
pub fn is_empty(&self) -> bool {
self.last_key.is_none()
}
/// Flushes then extract the internal writer that now contains the keys value entries.
pub fn into_inner(mut self) -> io::Result<W> {
self.writer.flush()?;
Ok(self.writer)
}
/// Flushes the internal writer that now contains the keys value entries.
pub fn finish(self) -> io::Result<()> {
self.into_inner().map(drop)
}
}
/// A reader of `obkv` databases.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct KvReader<'a, K> {
bytes: &'a [u8],
_phantom: PhantomData<K>,
}
impl<'a, K> KvReader<'a, K> {
/// Construct a reader on top of a memory area.
///
/// ```
/// use obkv::KvReaderU16;
///
/// let mut iter = KvReaderU16::new(&[]).iter();
/// assert_eq!(iter.next(), None);
/// ```
pub fn new(bytes: &[u8]) -> KvReader<K> {
KvReader {
bytes,
_phantom: PhantomData,
}
}
/// Returns the value associated with the given key
/// or `None` if the key is not present.
///
/// ```
/// use obkv::{KvWriterU16, KvReaderU16};
///
/// let mut writer = KvWriterU16::memory();
/// writer.insert(0, b"hello").unwrap();
/// writer.insert(1, b"blue").unwrap();
/// writer.insert(255, b"world").unwrap();
/// let obkv = writer.into_inner().unwrap();
///
/// let reader = KvReaderU16::new(&obkv);
/// assert_eq!(reader.get(0), Some(&b"hello"[..]));
/// assert_eq!(reader.get(1), Some(&b"blue"[..]));
/// assert_eq!(reader.get(10), None);
/// assert_eq!(reader.get(255), Some(&b"world"[..]));
/// ```
pub fn get(&self, requested_key: K) -> Option<&'a [u8]>
where
K: Key + PartialOrd,
{
self.iter()
.take_while(|(key, _)| *key <= requested_key)
.find(|(key, _)| *key == requested_key)
.map(|(_, val)| val)
}
/// Returns an iterator over all the keys in the key-value store.
///
/// ```
/// use obkv::{KvWriterU16, KvReaderU16};
///
/// let mut writer = KvWriterU16::memory();
/// writer.insert(0, b"hello").unwrap();
/// writer.insert(1, b"blue").unwrap();
/// writer.insert(255, b"world").unwrap();
/// let obkv = writer.into_inner().unwrap();
///
/// let mut iter = KvReaderU16::new(&obkv).iter();
/// assert_eq!(iter.next(), Some((0, &b"hello"[..])));
/// assert_eq!(iter.next(), Some((1, &b"blue"[..])));
/// assert_eq!(iter.next(), Some((255, &b"world"[..])));
/// assert_eq!(iter.next(), None);
/// assert_eq!(iter.next(), None); // is it fused?
/// ```
pub fn iter(&self) -> Fuse<KvIter<'a, K>>
where
K: Key,
{
KvIter {
bytes: self.bytes,
offset: 0,
_phantom: PhantomData,
}
.fuse()
}
}
impl<'a, K: Key> IntoIterator for KvReader<'a, K> {
type Item = (K, &'a [u8]);
type IntoIter = Fuse<KvIter<'a, K>>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
/// An iterator over a `obkv` database.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct KvIter<'a, K> {
bytes: &'a [u8],
offset: usize,
_phantom: PhantomData<K>,
}
impl<'a, K: Key> Iterator for KvIter<'a, K> {
type Item = (K, &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
let key = self
.bytes
.get(self.offset..self.offset + K::BYTES_SIZE)
.and_then(|s| s.try_into().ok())
.map(K::from_be_bytes)?;
self.offset += K::BYTES_SIZE;
let val_len = {
let mut val_len = 0;
let bytes = self.bytes.get(self.offset..)?;
self.offset += varint_decode32(bytes, &mut val_len)?;
val_len as usize
};
let val = self.bytes.get(self.offset..self.offset + val_len)?;
self.offset += val_len;
Some((key, val))
}
}
/// A trait that represents a key, this key will be encoded to disk.
pub trait Key: Copy {
/// The number of bytes the `BYTES` array contains.
const BYTES_SIZE: usize;
/// The array that will contain the bytes of the key.
type BYTES: AsRef<[u8]> + for<'a> TryFrom<&'a [u8]>;
/// Returns an array of the key bytes in big-endian.
fn to_be_bytes(&self) -> Self::BYTES;
/// Returns the key that corresponds to the given bytes array.
fn from_be_bytes(array: Self::BYTES) -> Self;
}
macro_rules! impl_key {
($($t:ty),+) => {
$(impl Key for $t {
const BYTES_SIZE: usize = std::mem::size_of::<$t>();
type BYTES = [u8; Self::BYTES_SIZE];
fn to_be_bytes(&self) -> Self::BYTES {
<$t>::to_be_bytes(*self)
}
fn from_be_bytes(array: Self::BYTES) -> Self {
Self::from_be_bytes(array)
}
})+
};
}
impl_key!(u8, u16, u32, u64);