strmap 1.0.0

A map using strings or paths as keys
Documentation
use std::fmt;
use std::ops::Deref;

use crate::piece::MapPiece;
use crate::StrMapConfig;

type Result<T, E = fst::Error> = std::result::Result<T, E>;
type InsertResult<R> = std::result::Result<R, InsertDuplicateError>;

/// A map from string to T.
pub struct StrMap<T> {
    // Pieces sorted by capacity.
    //
    // We don't allow multiple pieces with the same value of
    // `piece.capacity().next_power_of_two()`
    pieces: Vec<MapPiece<T>>,
    mini_piece: Vec<(Box<[u8]>, T)>,
    should_rebalance: bool,
}

impl<T> StrMap<T> {
    pub fn empty() -> Self {
        Self {
            pieces: Vec::new(),
            mini_piece: Vec::with_capacity(128),
            should_rebalance: false,
        }
    }

    pub fn build(keys: &[&[u8]], vals: Vec<T>, opts: &StrMapConfig) -> Result<Self> {
        let piece = MapPiece::build(keys, vals, opts)?;

        Ok(Self {
            pieces: vec![piece],
            mini_piece: Vec::with_capacity(128),
            should_rebalance: false,
        })
    }

    pub fn len(&self) -> usize {
        let mut len = self.mini_piece.len();
        for piece in &self.pieces {
            len += piece.len();
        }
        len
    }

    pub fn insert(&mut self, key: &[u8], value: T) -> InsertResult<()> {
        if self.has_key(key) {
            Err(InsertDuplicateError::new(key))
        } else {
            self.mini_piece
                .push((key.to_vec().into_boxed_slice(), value));
            if self.mini_piece.len() >= 1024 {
                self.should_rebalance = true;
            }
            Ok(())
        }
    }

    pub fn insert_many(&mut self, keys: &[&[u8]], vals: Vec<T>, opts: &StrMapConfig) -> Result<()> {
        let mut keys_dupe = std::collections::HashSet::new();

        for key in keys {
            if self.has_key(key) || !keys_dupe.insert(key) {
                return Err(InsertDuplicateError::new(key).into_fst());
            }
        }

        drop(keys_dupe);
        self.insert_many_unchecked(keys, vals, opts)
    }

    /// Assumes that `keys` have been checked for duplicates.
    pub(crate) fn insert_many_unchecked(
        &mut self,
        keys: &[&[u8]],
        vals: Vec<T>,
        opts: &StrMapConfig,
    ) -> Result<()> {
        assert_eq!(keys.len(), vals.len());

        if keys.len() < 128 {
            if self.mini_piece.len() >= 2048 {
                let len = keys.len() + self.mini_piece.len();
                let mut keys2: Vec<&[u8]> = Vec::with_capacity(len);
                keys2.extend_from_slice(keys);

                let mut vals2 = vals;
                let mut keys3 = Vec::with_capacity(self.mini_piece.len());
                for (key, val) in self.mini_piece.drain(..) {
                    keys3.push(key);
                    vals2.push(val);
                }
                keys2.extend(keys3.iter().map(|slice| &**slice));

                return self.insert_many_unchecked(&keys2, vals2, opts);
            } else {
                for (key, val) in keys.iter().copied().zip(vals) {
                    self.mini_piece.push((key.to_vec().into_boxed_slice(), val));
                }
                if self.mini_piece.len() >= 1024 {
                    self.should_rebalance = true;
                }
                return Ok(());
            }
        }

        let piece = MapPiece::build(keys, vals, opts)?;

        let p2 = piece.capacity().next_power_of_two();
        for i in 0..self.pieces.len() {
            if self.pieces[i].capacity() == p2 {
                self.should_rebalance = true;
                break;
            }
        }
        self.pieces.push(piece);
        self.pieces.sort_unstable_by_key(|p| p.capacity());
        Ok(())
    }

    pub fn first(&self) -> Option<(Vec<u8>, &T)> {
        if let Some(val) = self.get(b"") {
            return Some((Vec::new(), val));
        }

        self.next(b"")
    }

    pub fn next(&self, curr: &[u8]) -> Option<(Vec<u8>, &T)> {
        fn update_vec(vec: &mut Vec<u8>, new_data: &[u8]) {
            vec.clear();
            vec.extend_from_slice(new_data);
        }

        let mut next: Option<(Vec<u8>, &T)> = None;

        for (key, val) in &self.mini_piece {
            if curr < key {
                match next.as_mut() {
                    Some((next, nval)) => {
                        if &**key < &**next {
                            update_vec(next, key);
                            *nval = val;
                        }
                    }
                    None => {
                        next = Some((key.to_vec(), val));
                    }
                }
            }
        }

        for piece in &self.pieces {
            piece.with_next(curr, |key, val| match next.as_mut() {
                Some((next, nval)) => {
                    if key < &**next {
                        update_vec(next, key);
                        *nval = val;
                    }
                }
                None => {
                    next = Some((key.to_vec(), val));
                }
            });
        }

        next
    }

    pub fn should_rebalance(&self) -> bool {
        self.should_rebalance
    }

    pub fn rebalance(&mut self, opts: &StrMapConfig) -> Result<()> {
        self.should_rebalance = false;
        if self.mini_piece.len() > 64 {
            let mut keys = Vec::with_capacity(self.mini_piece.len());
            let mut vals = Vec::with_capacity(self.mini_piece.len());
            for (key, val) in self.mini_piece.drain(..) {
                keys.push(key);
                vals.push(val);
            }
            let key_slices: Vec<&[u8]> = keys.iter().map(|k| k.deref()).collect();

            let new_piece = MapPiece::build(&key_slices, vals, opts)?;
            self.pieces.insert(0, new_piece);
        }

        // If any pieces are too empty, rebuild them.
        for piece in self.pieces.iter_mut() {
            if 10 * piece.len() < 8 * piece.capacity() {
                let mut piece_builder = crate::piece::Builder::new(opts)?;
                let mut drainer = piece.drain();
                while let Some((key, val)) = drainer.next() {
                    piece_builder.insert(key, val)?;
                }
                *piece = piece_builder.finish()?;
            }
        }

        // Merge pieces in the same size class.
        'outer: loop {
            self.pieces.sort_unstable_by_key(|p| p.capacity());

            for i in 1..self.pieces.len() {
                let p1 = &self.pieces[i - 1];
                let p2 = &self.pieces[i];
                if p1.capacity().next_power_of_two() == p2.capacity().next_power_of_two() {
                    let p1 = self.pieces.swap_remove(i);
                    let p2 = self.pieces.swap_remove(i - 1);
                    self.pieces.push(crate::piece::merge_pieces(p1, p2, opts)?);

                    continue 'outer;
                }
            }

            return Ok(());
        }
    }

    pub fn has_key(&self, key: &[u8]) -> bool {
        for entry in &self.mini_piece {
            if entry.0.deref() == key {
                return true;
            }
        }

        for piece in &self.pieces {
            if piece.has_key(key) {
                return true;
            }
        }

        false
    }

    pub fn get(&self, key: &[u8]) -> Option<&T> {
        for entry in &self.mini_piece {
            if entry.0.deref() == key {
                return Some(&entry.1);
            }
        }

        for piece in &self.pieces {
            let res = piece.get(key);
            if res.is_some() {
                return res;
            }
        }

        None
    }

    pub fn get_mut(&mut self, key: &[u8]) -> Option<&mut T> {
        for entry in &mut self.mini_piece {
            if entry.0.deref() == key {
                return Some(&mut entry.1);
            }
        }

        for piece in &mut self.pieces {
            let res = piece.get_mut(key);
            if res.is_some() {
                return res;
            }
        }

        None
    }

    pub fn delete(&mut self, key: &[u8]) -> bool {
        for (i, entry) in self.mini_piece.iter().enumerate() {
            if entry.0.deref() == key {
                self.mini_piece.swap_remove(i);
                return true;
            }
        }

        for piece in &mut self.pieces {
            if piece.delete(key) {
                if 10 * piece.len() < 8 * piece.capacity() {
                    self.should_rebalance = true;
                }

                return true;
            }
        }

        false
    }
}

#[derive(Debug)]
pub struct InsertDuplicateError {
    key: Box<[u8]>,
}

impl InsertDuplicateError {
    fn new(key: &[u8]) -> Self {
        Self {
            key: key.to_vec().into_boxed_slice(),
        }
    }

    pub fn key(&self) -> &[u8] {
        &self.key
    }

    fn into_io(self) -> std::io::Error {
        std::io::Error::new(std::io::ErrorKind::AlreadyExists, self)
    }

    fn into_fst(self) -> fst::Error {
        self.into_io().into()
    }
}

impl std::error::Error for InsertDuplicateError {}

impl fmt::Display for InsertDuplicateError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let key = String::from_utf8_lossy(&self.key);
        write!(f, "trying to insert at existing key \"{}\"", key)
    }
}