pyrs 0.1.0

Python utils made to go fast 🚗💨
Documentation
extern crate natural;
use pyo3::{prelude::*, wrap_pymodule};

pub mod document {
    use pyo3::prelude::*;

    #[pyclass]
    #[derive(Clone, Debug)]
    pub struct Term {
        pub text: String,
    }

    #[pymethods]
    impl Term {
        #[new]
        fn new(text: String) -> Self {
            Self { text }
        }
    }

    #[pyclass]
    pub struct Document {
        pub terms: Vec<Term>,
    }

    #[pymethods]
    impl Document {
        #[new]
        fn new() -> Self {
            Self { terms: Vec::new() }
        }

        #[staticmethod]
        fn from_terms(terms: Vec<Term>) -> PyResult<Self> {
            Ok(Self { terms })
        }
    }

    #[pymodule]
    #[pyo3(name = "document")]
    pub fn py_mod_document(_py: Python, m: &PyModule) -> PyResult<()> {
        m.add_class::<Document>()?;
        m.add_class::<Term>()?;
        Ok(())
    }
}

pub mod tree {

    use std::{
        cell::RefCell,
        collections::HashMap,
        hash::{Hash, Hasher},
        rc::Rc,
    };

    #[derive(Debug, Clone, Eq, Default)]
    pub struct GenericTreeNode {
        pub key: u16,
        pub children: HashMap<u16, Rc<RefCell<GenericTreeNode>>>,
    }

    impl GenericTreeNode {
        pub fn new(key: u16) -> Self {
            Self {
                key,
                children: HashMap::new(),
            }
        }

        pub fn add_child(&mut self, other: GenericTreeNode) {
            self.children
                .insert(other.key, Rc::new(RefCell::new(other)));
        }

        pub fn get_child(&self, key: &u16) -> Option<&Rc<RefCell<GenericTreeNode>>> {
            self.children.get(key)
        }

        pub fn contains_child(&self, key: &u16) -> bool {
            self.children.contains_key(key)
        }
    }

    fn get_byte(item: &str, pos: usize) -> Option<u16> {
        if item.len() > pos {
            Some(item.chars().nth(pos).unwrap() as u16)
        } else {
            None
        }
    }

    impl std::cmp::PartialEq for GenericTreeNode {
        fn eq(&self, other: &Self) -> bool {
            self.key == other.key
        }
    }

    impl Hash for GenericTreeNode {
        fn hash<H: Hasher>(&self, state: &mut H) {
            self.key.hash(state);
        }
    }

    #[derive(Debug, Default)]
    pub struct GenericTree {
        pub(crate) root: Rc<RefCell<GenericTreeNode>>,
        pub(crate) curr: Rc<RefCell<GenericTreeNode>>,
        pub(crate) num_nodes: usize,
        pub(crate) num_items: usize,
    }

    impl GenericTree {
        pub fn new() -> Self {
            let root = Rc::new(RefCell::new(GenericTreeNode::new(0)));
            let curr = root.clone();

            GenericTree {
                root,
                curr,
                num_nodes: 1,
                num_items: 1,
            }
        }

        fn reset_curr(&mut self) {
            self.curr = self.root.clone();
        }

        pub fn add(&mut self, item: &str) {
            self.build_path(item, 0);
            self.num_items += 1;
        }

        pub fn get(&mut self, item: &str) -> Vec<u16> {
            let mut path = Vec::new();
            self.traverse(item, &mut path);

            path
        }

        pub fn build_path(&mut self, item: &str, pos: usize) {
            if let Some(b) = get_byte(item, pos) {
                let node = GenericTreeNode::new(b);
                if !self.curr.borrow().contains_child(&b) {
                    self.curr.borrow_mut().add_child(node);
                    self.num_nodes += 1;
                }
                let next = self.curr.borrow().get_child(&b).unwrap().clone();
                self.curr = next;
                return self.build_path(item, pos + 1);
            }
            self.reset_curr();
        }

        pub fn traverse(&mut self, item: &str, path: &mut Vec<u16>) {
            if let Some(b) = get_byte(item, path.len()) {
                if let Some(next) = self.curr.clone().borrow().get_child(&b) {
                    self.curr = next.clone();
                    path.push(b);
                    return self.traverse(item, path);
                }
            }
            self.reset_curr();
        }

        pub fn contains(&mut self, item: &str) -> bool {
            let mut path = Vec::new();
            self.traverse(item, &mut path);
            if String::from_utf16_lossy(&path[..]) == item {
                return true;
            }

            false
        }
    }

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_can_create_generic_tree_with_initial_properties() {
            let t = GenericTree::new();
            assert_eq!(t.curr, t.root);
            assert_eq!(t.curr.borrow().children.is_empty(), true);
            assert_eq!(t.num_nodes, 1);
            assert_eq!(t.num_items, 1);
        }

        #[test]
        fn test_can_add_query_to_tree_when_query_not_already_in_tree() {
            let mut t = GenericTree::new();
            t.add("foo");

            assert_eq!(t.num_nodes, 4);
            assert_eq!(t.num_items, 2);
        }

        #[test]
        fn test_can_get_query_from_tree_when_query_exists_in_tree() {
            let mut t = GenericTree::new();
            t.add("foo");

            let path = t.get("foo");
            assert_eq!(path, "foo".encode_utf16().collect::<Vec<u16>>());
        }

        #[test]
        fn test_can_add_multiple_queries_to_tree_and_only_marginal_difference_is_added() {
            let mut t = GenericTree::new();

            t.add("foo");
            t.add("bar");
            t.add("foozo");

            assert_eq!(t.num_nodes, 9);
            assert_eq!(t.num_items, 4);
        }

        #[test]
        fn test_contains_returns_indicator_of_query_in_tree() {
            let mut t = GenericTree::new();
            t.add("foo");
            t.add("bar");

            assert_eq!(t.contains("foo"), true);
            assert_eq!(t.contains("bar"), true);
            assert_eq!(t.contains("zoo"), false);
        }
    }
}

pub mod string {
    use natural::distance;
    use pyo3::prelude::*;

    #[pyfunction]
    fn left_find(s: &str, c: char) -> PyResult<Option<usize>> {
        Ok(s.find(c))
    }

    #[pyfunction]
    fn right_find(s: &str, c: char) -> PyResult<Option<usize>> {
        Ok(s.rfind(c))
    }

    #[pyfunction]
    fn levenshtein_distance(s: &str, t: &str) -> PyResult<usize> {
        Ok(distance::levenshtein_distance(s, t))
    }

    #[pyfunction]
    fn jaro_winkler_distance(s: &str, t: &str) -> PyResult<f32> {
        Ok(distance::jaro_winkler_distance(s, t))
    }

    #[pymodule]
    #[pyo3(name = "string")]
    pub fn py_mod_string(_py: Python, m: &PyModule) -> PyResult<()> {
        m.add_function(wrap_pyfunction!(levenshtein_distance, m)?)?;
        m.add_function(wrap_pyfunction!(jaro_winkler_distance, m)?)?;
        m.add_function(wrap_pyfunction!(left_find, m)?)?;
        m.add_function(wrap_pyfunction!(right_find, m)?)?;
        Ok(())
    }
}

#[pymodule]
#[pyo3(name = "pyrs")]
fn pyrs(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_wrapped(wrap_pymodule!(string::py_mod_string))?;
    m.add_wrapped(wrap_pymodule!(document::py_mod_document))?;
    Ok(())
}