yass-2 1.1.1

Yet another string similarity library
Documentation
use pyo3::prelude::*;

use crate::error::into_pyerr;

use super::{
    CharacterTokenizer, HybridJaccard, Jaro, JaroWinkler, Levenshtein, MongeElkan, StrSim,
    SymmetricMongeElkan, Tokenizer, WhitespaceCharSeqTokenizer,
};

#[pyclass(module = "strsim", name = "WhitespaceCharSeqTokenizer")]
pub struct PyWhitespaceCharSeqTokenizer(WhitespaceCharSeqTokenizer);

#[pyclass(module = "strsim", name = "CharacterTokenizer")]
pub struct PyCharacterTokenizer(CharacterTokenizer);

#[pyclass(module = "strsim")]
pub struct VecVecChar(Vec<Vec<char>>);

#[pyclass(module = "strsim")]
pub struct VecChar(Vec<char>);

#[pymethods]
impl PyWhitespaceCharSeqTokenizer {
    #[new]
    fn new() -> Self {
        PyWhitespaceCharSeqTokenizer(WhitespaceCharSeqTokenizer {})
    }

    fn tokenize(&mut self, s: &str) -> VecVecChar {
        VecVecChar(self.0.tokenize(s))
    }

    fn unique_tokenize(&mut self, s: &str) -> VecVecChar {
        VecVecChar(self.0.unique_tokenize(s))
    }
}

#[pymethods]
impl PyCharacterTokenizer {
    #[new]
    fn new() -> Self {
        PyCharacterTokenizer(CharacterTokenizer {})
    }

    fn tokenize(&mut self, s: &str) -> VecChar {
        VecChar(self.0.tokenize(s))
    }

    fn unique_tokenize(&mut self, s: &str) -> VecChar {
        VecChar(self.0.unique_tokenize(s))
    }
}

#[pyfunction]
pub fn hybrid_jaccard_similarity(key: &VecVecChar, query: &VecVecChar) -> PyResult<f64> {
    HybridJaccard::default()
        .similarity_pre_tok2(&key.0, &query.0)
        .map_err(into_pyerr)
}

#[pyfunction]
pub fn levenshtein_similarity(key: &VecChar, query: &VecChar) -> PyResult<f64> {
    Levenshtein::default()
        .similarity_pre_tok2(&key.0, &query.0)
        .map_err(into_pyerr)
}

#[pyfunction]
#[pyo3(name = "jaro_similarity")]
pub fn jaro_similarity(key: &VecChar, query: &VecChar) -> PyResult<f64> {
    (Jaro {})
        .similarity_pre_tok2(&key.0, &query.0)
        .map_err(into_pyerr)
}

#[pyfunction(name = "jaro_winkler_similarity")]
#[pyo3(signature = (key, query, threshold = 0.7, scaling_factor = 0.1, prefix_len = 4))]
pub fn jaro_winkler_similarity(
    key: &VecChar,
    query: &VecChar,
    threshold: f64,
    scaling_factor: f64,
    prefix_len: usize,
) -> PyResult<f64> {
    (JaroWinkler {
        threshold,
        scaling_factor,
        prefix_len,
    })
    .similarity_pre_tok2(&key.0, &query.0)
    .map_err(into_pyerr)
}

#[pyfunction]
pub fn monge_elkan_similarity(key: &VecVecChar, query: &VecVecChar) -> PyResult<f64> {
    MongeElkan::default()
        .similarity_pre_tok2(&key.0, &query.0)
        .map_err(into_pyerr)
}

#[pyfunction]
pub fn symmetric_monge_elkan_similarity(key: &VecVecChar, query: &VecVecChar) -> PyResult<f64> {
    SymmetricMongeElkan(MongeElkan::default())
        .similarity_pre_tok2(&key.0, &query.0)
        .map_err(into_pyerr)
}