rapiddb 0.1.16

A reasonably fast configurable embedded key-value sensor database
Documentation
use crate::db::mmav_unit::MMAVUnit;
use crate::errors::MMAVError;

/// Memory Mapped Append-only Vector
///
/// This is a higher abstraction above MMAVUnit, that allows
/// using multiple MMAVUnits, which are loaded and unloaded on demand.
/// This ultimitely allows for a dynamically allocated append-only
/// vector, which uses memory mapped file io.
///
/// ## Examples
/// ```ignore
/// let mut mmav = MMAV::new(".db/test-0");
///
/// let data = b"{\"key\":\"value\"}";
/// mmav.push(data).unwrap_or_default();
/// assert_eq!(mmav.last(), data);
/// ```
#[allow(clippy::upper_case_acronyms)]
pub struct MMAV {
  id: String,
  index: usize,
  mmav_size: usize,
  indices: Vec<usize>,
  mmav_data_start_index: usize,
  unit_map: std::collections::HashMap<usize, MMAVUnit>,
}
impl MMAV {
  /// Memory Mapped Append-only Vector Constructor
  ///
  /// Creates a MMAV with given `id`
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.last(), data);
  /// ```
  pub fn new(id: &str) -> Self {
    let mut index = Default::default();

    let paths = std::fs::read_dir(id).unwrap_or_else(|_| {
      std::fs::create_dir(id).unwrap_or_default();
      std::fs::read_dir(id).unwrap()
    });

    let mut indices = vec![];

    for path in paths {
      path
        .unwrap()
        .file_name()
        .into_string()
        .unwrap_or_default()
        .parse::<usize>()
        .map(|x| indices.push(x))
        .unwrap_or_default();
    }

    indices.sort_unstable();

    if indices.is_empty() {
      indices.push(index);
    }

    let mut unit_map = Default::default();
    let mmav_size = 14_580_008;
    let mmav_data_start_index = 80008;

    if let Some(x) = indices.last() {
      index = *x;
      MMAV::load_unchecked(
        id,
        &mut unit_map,
        *x,
        mmav_size,
        mmav_data_start_index,
      );
    };

    if indices.len() > 1 {
      let x = indices[indices.len() - 2..indices.len() - 1][0];
      MMAV::load_unchecked(
        id,
        &mut unit_map,
        x,
        mmav_size,
        mmav_data_start_index,
      );
    }

    Self {
      index,
      unit_map,
      indices,
      mmav_size,
      mmav_data_start_index,
      id: id.to_owned(),
    }
  }

  /// Computes the closest `index` in `array`
  ///
  /// ## Constraints
  /// `arr` needs to be sorted in ascending order
  ///
  /// ## Examples
  /// ```ignore
  /// assert_eq!(MMAV::bisect_left(5, vec![0, 2, 4, 6, 8, 10]), 4);
  /// ```
  fn bisect_left(index: usize, arr: &[usize]) -> usize {
    if arr.len() == 1 {
      return arr[0];
    }

    let middle = arr.len() / 2;
    let middle_item = arr[middle];

    if index < middle_item {
      return MMAV::bisect_left(index, &arr[..middle]);
    }

    if index > middle_item {
      return MMAV::bisect_left(index, &arr[middle..]);
    }

    index
  }

  /// Load unit with `id`, `size` and `data_start_index` into `unit_map`
  /// with `index` as key, without checking `self.indices`
  ///
  /// ## Examples
  /// ```ignore
  /// let mut unit_map = Default::default();
  /// MMAV::load_unchecked("test-0", &mut unit_map, 0, 4000000, 80008);
  /// ```
  fn load_unchecked(
    id: &str,
    unit_map: &mut std::collections::HashMap<usize, MMAVUnit>,
    index: usize,
    size: usize,
    data_start_index: usize,
  ) -> usize {
    if unit_map.contains_key(&index) {
      return unit_map[&index].len();
    }

    let unit = MMAVUnit::new(&format!("{id}/{index}"), size, data_start_index);
    let result = unit.len();

    unit_map.insert(index, unit);

    result
  }

  /// Load unit that contains `index`
  ///
  /// ## Examples
  /// ```ignore
  /// self.load(0);
  /// ```
  fn load(&mut self, index: usize) -> usize {
    MMAV::load_unchecked(
      &self.id,
      &mut self.unit_map,
      MMAV::bisect_left(index, &self.indices),
      self.mmav_size,
      self.mmav_data_start_index,
    )
  }

  /// Unload all units except the last `keep` number of units
  ///
  /// ## Examples
  /// ```ignore
  /// self.unload(1);
  /// ```
  fn unload(&mut self, keep: usize) {
    if keep > self.indices.len() {
      panic!("index out of range");
    }

    let mut to_remove = vec![];
    for key in self.unit_map.keys() {
      if !self.indices[self.indices.len() - keep..].contains(key) {
        to_remove.push(*key);
      }
    }

    for key in to_remove {
      self.unit_map.remove(&key);
    }
  }

  /// Expand vector
  ///
  /// ## Examples
  /// ```ignore
  /// self.expand();
  /// ```
  fn expand(&mut self) {
    self.unload(1);

    self.index += self.unit_map[&self.index].len();
    self.indices.push(self.index);

    if let Some(x) = self.indices.last() {
      MMAV::load_unchecked(
        &self.id,
        &mut self.unit_map,
        *x,
        self.mmav_size,
        self.mmav_data_start_index,
      );
    }
  }

  /// Push `value` to vector
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.last(), data);
  /// ```
  pub fn push(&mut self, value: &[u8]) {
    self.unit_map.get_mut(&self.index).unwrap().push(value).unwrap_or_else(
      |error| match error {
        MMAVError::ArrayFull => {
          self.expand();
          self.push(value);
        }
        MMAVError::FileFull => {
          self.expand();
          self.push(value);
        }
        _ => (),
      },
    );
  }

  /// Get `value` at `index` immutably
  ///
  /// ## Examples
  /// ```ignore
  /// self._get(0)
  /// ```
  fn _get(&self, index: usize) -> Vec<u8> {
    if self.len() == 0 || index > self.len() - 1 {
      return Default::default();
    }

    let closest = MMAV::bisect_left(index, &self.indices);

    self.unit_map[&closest].get(index - closest).unwrap_or_default()
  }

  /// Get `value` at `index`
  ///
  /// May load data from disk, if it is not in-memory,
  /// as such it is mutable, even though a get normaly is immutable.
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.get(0), data);
  /// ```
  pub fn get(&mut self, index: usize) -> Vec<u8> {
    self.load(index);

    self._get(index)
  }

  /// Get last item in vector
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.last(), data);
  /// ```
  pub fn last(&self) -> Vec<u8> {
    self
      .indices
      .last()
      .map(|x| self.unit_map.get(x).map(|y| y.last()))
      .unwrap_or_default()
      .unwrap_or_default()
  }

  /// Get length of vector
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.len(), 1);
  /// ```
  pub fn len(&self) -> usize {
    return self
      .indices
      .last()
      .map(|x| self.index + self.unit_map[x].len())
      .unwrap_or_default();
  }

  /// Get range from `start` to `end` immutably
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav._range(0, 1), vec![data, data]);
  /// ```
  fn _range(&self, start: usize, end: usize) -> Vec<Vec<u8>> {
    let mut result = vec![];

    if end > self.len() {
      return result;
    }

    for i in start..=end {
      let item = self._get(i);

      if !item.is_empty() {
        result.push(item);
      }
    }

    result
  }

  /// Get range from `start` to `end`
  ///
  /// May load data from disk, if it is not in-memory,
  /// as such it is mutable, even though a range scan normaly is
  /// immutable.
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.range(0, 1), vec![data, data]);
  /// ```
  pub fn range(&mut self, start: usize, end: usize) -> Vec<Vec<u8>> {
    let start_size = self.load(start);
    let end_size = self.load(end);

    let size = (|| {
      if start_size != end_size {
        return start_size + end_size;
      }
      start_size
    })();

    if std::cmp::max(start, end) - std::cmp::min(start, end) > size {
      return self._range(start, start + size - 1);
    }

    self._range(start, end)
  }

  /// Get last `limit` number of items from vector
  ///
  /// May load data from disk, if it is not in-memory,
  /// as such it is mutable, even though a get last item normaly is
  /// immutable.
  ///
  /// ## Examples
  /// ```ignore
  /// let mut mmav = MMAV::new(".db/test-0");
  ///
  /// let data = b"{\"key\":\"value\"}";
  /// mmav.push(data).unwrap_or_default();
  /// mmav.push(data).unwrap_or_default();
  /// assert_eq!(mmav.last_limit(2), vec![data, data]);
  /// ```
  pub fn last_limit(&mut self, limit: usize) -> Vec<Vec<u8>> {
    if self.len() == 0 {
      return Default::default();
    }

    if limit > self.len() {
      return self.range(0, self.len() - 1);
    }

    self.range(self.len() - limit, self.len() - 1)
  }
}