strmap 1.0.0

A map using strings or paths as keys
Documentation
use std::convert::AsRef;
use std::fmt;
use std::path::{Path, PathBuf};

use crate::{StrMap, StrMapConfig};

type Result<T, E = fst::Error> = std::result::Result<T, E>;
type InsertResult<R> = std::result::Result<R, InsertDuplicateError>;

#[cfg(unix)]
pub fn path_to_bytes<F, T>(path: &Path, func: F) -> T
where
    F: FnOnce(&[u8]) -> T,
{
    use std::os::unix::ffi::OsStrExt;

    func(path.as_os_str().as_bytes())
}

#[cfg(unix)]
pub fn bytes_to_path(path: &[u8]) -> PathBuf {
    use std::ffi::OsStr;
    use std::os::unix::ffi::OsStrExt;

    Path::new(OsStr::from_bytes(path)).to_path_buf()
}

#[cfg(windows)]
pub fn path_to_bytes<F, T>(path: &Path, func: F) -> T
where
    F: FnOnce(&[u8]) -> T,
{
    use std::char::decode_utf16;
    use std::os::windows::ffi::OsStrExt;

    let iter = decode_utf16(path.as_os_str().encode_wide());

    let mut byte_repr = Vec::new();
    let mut buf = [0; 4];
    for item in iter {
        match item {
            Ok(c) => {
                let slice = c.encode_utf8(&mut buf).as_bytes();
                if slice == b"\\" {
                    byte_repr.push(b'/');
                } else {
                    byte_repr.extend_from_slice(slice);
                }
            }
            Err(err) => {
                byte_repr.push(0xff);
                byte_repr.extend_from_slice(&u16::to_le_bytes(err.unpaired_surrogate()));
            }
        }
    }

    func(&byte_repr)
}

#[cfg(windows)]
pub fn bytes_to_path(mut path: &[u8]) -> PathBuf {
    use std::ffi::OsString;
    use std::os::windows::ffi::OsStringExt;

    let mut wide: Vec<u16> = Vec::with_capacity(path.len());
    while path.len() > 0 {
        let stop = path.iter().position(|i| *i == 0xff).unwrap_or(path.len());

        if stop > 0 {
            let (chunk, next_path) = path.split_at(stop);
            path = next_path;

            let chunk = std::str::from_utf8(chunk).unwrap();
            wide.extend(chunk.encode_utf16());
        } else {
            wide.push(u16::from_le_bytes([path[1], path[2]]));
            path = &path[3..];
        }
    }

    OsString::from_wide(&wide).into()
}

/// A map from path to T.
pub struct PathMap<T> {
    inner: StrMap<T>,
}

impl<T> PathMap<T> {
    pub fn empty() -> Self {
        Self {
            inner: StrMap::empty(),
        }
    }

    pub fn len(&self) -> usize {
        self.inner.len()
    }

    pub fn insert(&mut self, key: &Path, value: T) -> InsertResult<()> {
        path_to_bytes(key, |key| self.inner.insert(key, value))
            .map_err(|_| InsertDuplicateError::new(key))
    }

    pub fn insert_many<P: AsRef<Path>>(
        &mut self,
        keys: &[P],
        vals: Vec<T>,
        opts: &StrMapConfig,
    ) -> Result<()> {
        let len = keys.len();
        assert_eq!(vals.len(), len);
        let mut keys_data = Vec::new();
        let mut keys_idx = Vec::with_capacity(len + 1);
        keys_idx.push(0);

        for key in keys {
            let key = key.as_ref();
            let start = keys_data.len();
            path_to_bytes(key, |key| keys_data.extend_from_slice(key));
            keys_idx.push(keys_data.len());

            if self.inner.has_key(&keys_data[start..]) {
                return Err(InsertDuplicateError::new(key).into_fst());
            }
        }

        let mut keys_dupe = std::collections::HashSet::new();
        let mut key_args = Vec::with_capacity(len);
        for i in 0..len {
            let key = &keys_data[keys_idx[i]..keys_idx[i + 1]];
            if !keys_dupe.insert(key) {
                return Err(InsertDuplicateError::new(keys[i].as_ref()).into_fst());
            }
            key_args.push(key);
        }
        drop(keys_dupe);
        drop(keys_idx);

        self.inner.insert_many_unchecked(&key_args, vals, opts)
    }

    pub fn first(&self) -> Option<(PathBuf, &T)> {
        self.inner
            .first()
            .map(|(path, item)| (bytes_to_path(&path), item))
    }

    pub fn next(&self, curr: &Path) -> Option<(PathBuf, &T)> {
        path_to_bytes(curr, |curr| self.inner.next(curr))
            .map(|(path, item)| (bytes_to_path(&path), item))
    }

    pub fn should_rebalance(&self) -> bool {
        self.inner.should_rebalance()
    }

    pub fn rebalance(&mut self, opts: &StrMapConfig) -> Result<()> {
        self.inner.rebalance(opts)
    }

    pub fn has_key(&self, key: &Path) -> bool {
        path_to_bytes(key, |key| self.inner.has_key(key))
    }

    pub fn get(&self, key: &Path) -> Option<&T> {
        path_to_bytes(key, |key| self.inner.get(key))
    }

    pub fn get_mut(&mut self, key: &Path) -> Option<&mut T> {
        path_to_bytes(key, |key| self.inner.get_mut(key))
    }

    pub fn delete(&mut self, key: &Path) -> bool {
        path_to_bytes(key, |key| self.inner.delete(key))
    }
}

#[derive(Debug)]
pub struct InsertDuplicateError {
    key: PathBuf,
}

impl InsertDuplicateError {
    fn new(key: &Path) -> Self {
        Self {
            key: key.to_path_buf(),
        }
    }

    pub fn key(&self) -> &Path {
        &self.key
    }

    fn into_io(self) -> std::io::Error {
        std::io::Error::new(std::io::ErrorKind::AlreadyExists, self)
    }

    fn into_fst(self) -> fst::Error {
        self.into_io().into()
    }
}

impl std::error::Error for InsertDuplicateError {}

impl fmt::Display for InsertDuplicateError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "trying to insert at existing key \"{}\"",
            self.key.display()
        )
    }
}