matcher_rs 0.1.1

A high performance multiple functional word matcher
use std::borrow::Cow;

use fancy_regex::Regex;
use rapidfuzz::distance::levenshtein;

use super::{MatchResultTrait, TextMatcherTrait};

pub struct SimTable<'a> {
    pub table_id: u32,
    pub match_id: &'a str,
    pub word_list: &'a Vec<&'a str>,
}

struct SimProcessedTable {
    table_id: u32,
    match_id: String,
    word_list: Vec<String>,
}

#[derive(Debug)]
pub struct SimResult<'a> {
    pub word: Cow<'a, str>,
    pub table_id: u32,
    pub match_id: &'a str,
    pub similarity: f64,
}

impl MatchResultTrait<'_> for SimResult<'_> {
    fn table_id(&self) -> usize {
        self.table_id as usize
    }
    fn word(&self) -> &str {
        self.word.as_ref()
    }
}

pub struct SimMatcher {
    remove_special_pattern: Regex,
    sim_processed_table_list: Vec<SimProcessedTable>,
}

impl SimMatcher {
    pub fn new(sim_table_list: &Vec<SimTable>) -> SimMatcher {
        SimMatcher {
            remove_special_pattern: Regex::new(r"\W+").unwrap(),
            sim_processed_table_list: sim_table_list
                .iter()
                .map(|sim_table| SimProcessedTable {
                    table_id: sim_table.table_id,
                    match_id: sim_table.match_id.to_owned(),
                    word_list: sim_table
                        .word_list
                        .iter()
                        .map(|&word| word.to_owned())
                        .collect::<Vec<String>>(),
                })
                .collect(),
        }
    }
}

impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
    fn is_match(&self, text: &str) -> bool {
        let processed_text = self.remove_special_pattern.replace_all(text, "");

        self.sim_processed_table_list.iter().any(|sim_table| {
            sim_table.word_list.iter().any(|text| {
                levenshtein::normalized_similarity_with_args(
                    text.chars(),
                    processed_text.chars(),
                    &levenshtein::Args::default().score_cutoff(0.8),
                )
                .is_some()
            })
        })
    }

    fn process(&'a self, text: &str) -> Vec<SimResult<'a>> {
        let processed_text = self.remove_special_pattern.replace_all(text, "");

        let mut result_list = Vec::new();

        for sim_table in &self.sim_processed_table_list {
            result_list.extend(sim_table.word_list.iter().filter_map(|text| {
                levenshtein::normalized_similarity_with_args(
                    text.chars(),
                    processed_text.chars(),
                    &levenshtein::Args::default().score_cutoff(0.8),
                )
                .map(|similarity| SimResult {
                    word: Cow::Borrowed(text),
                    table_id: sim_table.table_id,
                    match_id: &sim_table.match_id,
                    similarity,
                })
            }));
        }

        result_list
    }
}