fst 0.1.9

Use finite state transducers to compactly represents sets or maps of many strings (> 1 billion is possible).
Documentation
use raw::{NONE_ADDRESS, CompiledAddr};
use raw::build::BuilderNode;

#[derive(Debug)]
pub struct Registry {
    table: Vec<RegistryCell>,
    table_size: usize, // number of rows
    mru_size: usize, // number of columns
}

#[derive(Debug)]
struct RegistryMru<'a> {
    cells: &'a mut [RegistryCell],
}

#[derive(Clone, Debug)]
pub struct RegistryCell {
    addr: CompiledAddr,
    node: BuilderNode,
}

#[derive(Debug)]
pub enum RegistryEntry<'a> {
    Found(CompiledAddr),
    NotFound(&'a mut RegistryCell),
    Rejected,
}

impl Registry {
    pub fn new(table_size: usize, mru_size: usize) -> Registry {
        let empty_cell = RegistryCell::none();
        let ncells = table_size.checked_mul(mru_size).unwrap();
        Registry {
            table: vec![empty_cell; ncells],
            table_size: table_size,
            mru_size: mru_size,
        }
    }

    pub fn entry<'a>(&'a mut self, node: &BuilderNode) -> RegistryEntry<'a> {
        if self.table.is_empty() {
            return RegistryEntry::Rejected;
        }
        let bucket = self.hash(node);
        let start = self.mru_size * bucket;
        let end = start + self.mru_size;
        RegistryMru { cells: &mut self.table[start..end] }.entry(node)
    }

    fn hash(&self, node: &BuilderNode) -> usize {
        // Basic FNV-1a hash as described:
        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
        //
        // In unscientific experiments, this provides the same compression
        // as `std::hash::SipHasher` but is much much faster.
        const FNV_PRIME: usize = 1099511628211;
        let mut h = 14695981039346656037;
        h = (h ^ (node.is_final as usize)).wrapping_mul(FNV_PRIME);
        h = (h ^ (node.final_output.value() as usize)).wrapping_mul(FNV_PRIME);
        for t in &node.trans {
            h = (h ^ (t.inp as usize)).wrapping_mul(FNV_PRIME);
            h = (h ^ (t.out.value() as usize)).wrapping_mul(FNV_PRIME);
            h = (h ^ (t.addr as usize)).wrapping_mul(FNV_PRIME);
        }
        h % self.table_size
    }
}

impl<'a> RegistryMru<'a> {
    fn entry(mut self, node: &BuilderNode) -> RegistryEntry<'a> {
        if self.cells.len() == 1 {
            let cell = &mut self.cells[0];
            if !cell.is_none() && &cell.node == node {
                RegistryEntry::Found(cell.addr)
            } else {
                cell.node.clone_from(node);
                RegistryEntry::NotFound(cell)
            }
        } else {
            let find = |c: &RegistryCell| !c.is_none() && &c.node == node;
            if let Some(i) = self.cells.iter().position(find) {
                let addr = self.cells[i].addr;
                self.promote(i); // most recently used
                RegistryEntry::Found(addr)
            } else {
                let last = self.cells.len() - 1;
                if self.cells[last].is_none() {
                    self.promote(last);
                }
                self.cells[0].node.clone_from(node); // discard MRU
                RegistryEntry::NotFound(&mut self.cells[0])
            }
        }
    }

    fn promote(&mut self, mut i: usize) {
        assert!(i < self.cells.len());
        while i > 0 {
            self.cells.swap(i-1, i);
            i -= 1;
        }
    }
}

impl RegistryCell {
    fn none() -> RegistryCell {
        RegistryCell {
            addr: NONE_ADDRESS,
            node: BuilderNode::default(),
        }
    }

    fn is_none(&self) -> bool {
        self.addr == NONE_ADDRESS
    }

    pub fn insert(&mut self, addr: CompiledAddr) {
        self.addr = addr;
    }
}

#[cfg(test)]
mod tests {
    use raw::{Output, Transition};
    use raw::build::BuilderNode;
    use super::{
        Registry, RegistryCell, RegistryEntry, RegistryMru,
    };

    fn assert_rejected(entry: RegistryEntry) {
        match entry {
            RegistryEntry::Rejected => {}
            entry => panic!("expected rejected entry, got: {:?}", entry),
        }
    }

    fn assert_not_found(entry: RegistryEntry) {
        match entry {
            RegistryEntry::NotFound(_) => {}
            entry => panic!("expected nout found entry, got: {:?}", entry),
        }
    }

    fn assert_insert_and_found(reg: &mut Registry, bnode: &BuilderNode) {
        match reg.entry(&bnode) {
            RegistryEntry::NotFound(cell) => cell.insert(1234),
            entry => panic!("unexpected not found entry, got: {:?}", entry),
        }
        match reg.entry(&bnode) {
            RegistryEntry::Found(addr) => assert_eq!(addr, 1234),
            entry => panic!("unexpected found entry, got: {:?}", entry),
        }
    }

    #[test]
    fn empty_is_ok() {
        let mut reg = Registry::new(0, 0);
        let bnode = BuilderNode {
            is_final: false,
            final_output: Output::zero(),
            trans: vec![],
        };
        assert_rejected(reg.entry(&bnode));
    }

    #[test]
    fn one_final_is_ok() {
        let mut reg = Registry::new(1, 1);
        let bnode = BuilderNode {
            is_final: true,
            final_output: Output::zero(),
            trans: vec![],
        };
        assert_insert_and_found(&mut reg, &bnode);
    }

    #[test]
    fn one_with_trans_is_ok() {
        let mut reg = Registry::new(1, 1);
        let bnode = BuilderNode {
            is_final: false,
            final_output: Output::zero(),
            trans: vec![Transition {
                addr: 0, inp: b'a', out: Output::zero(),
            }],
        };
        assert_insert_and_found(&mut reg, &bnode);
        assert_not_found(
            reg.entry(&BuilderNode { is_final: true, .. bnode.clone() }));
        assert_not_found(reg.entry(&BuilderNode {
            trans: vec![Transition {
                addr: 0, inp: b'b', out: Output::zero(),
            }],
            .. bnode.clone()
        }));
        assert_not_found(reg.entry(&BuilderNode {
            trans: vec![Transition {
                addr: 0, inp: b'a', out: Output::new(1),
            }],
            .. bnode.clone()
        }));
    }

    #[test]
    fn mru_works() {
        let mut reg = Registry::new(1, 1);

        let bnode1 = BuilderNode { is_final: true, ..BuilderNode::default() };
        assert_insert_and_found(&mut reg, &bnode1);

        let bnode2 = BuilderNode {
            final_output: Output::new(1), ..bnode1.clone()
        };
        assert_insert_and_found(&mut reg, &bnode2);
        assert_not_found(reg.entry(&bnode1));
    }

    #[test]
    fn promote() {
        let bn = BuilderNode::default();
        let mut bnodes = vec![
            RegistryCell { addr: 1, node: bn.clone() },
            RegistryCell { addr: 2, node: bn.clone() },
            RegistryCell { addr: 3, node: bn.clone() },
            RegistryCell { addr: 4, node: bn.clone() },
        ];
        let mut mru = RegistryMru { cells: &mut bnodes };

        mru.promote(0);
        assert_eq!(mru.cells[0].addr, 1);
        assert_eq!(mru.cells[1].addr, 2);
        assert_eq!(mru.cells[2].addr, 3);
        assert_eq!(mru.cells[3].addr, 4);

        mru.promote(1);
        assert_eq!(mru.cells[0].addr, 2);
        assert_eq!(mru.cells[1].addr, 1);
        assert_eq!(mru.cells[2].addr, 3);
        assert_eq!(mru.cells[3].addr, 4);

        mru.promote(3);
        assert_eq!(mru.cells[0].addr, 4);
        assert_eq!(mru.cells[1].addr, 2);
        assert_eq!(mru.cells[2].addr, 1);
        assert_eq!(mru.cells[3].addr, 3);

        mru.promote(2);
        assert_eq!(mru.cells[0].addr, 1);
        assert_eq!(mru.cells[1].addr, 4);
        assert_eq!(mru.cells[2].addr, 2);
        assert_eq!(mru.cells[3].addr, 3);
    }
}