nlprule 0.6.4

A fast, low-resource Natural Language Processing and Error Correction library.
Documentation
use crate::rule::{engine::composition::GraphId, MatchGraph, MatchSentence};
use crate::utils::regex::Regex;
use enum_dispatch::enum_dispatch;
use serde::{Deserialize, Serialize};

#[enum_dispatch]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Filter {
    NoDisambiguationEnglishPartialPosTagFilter,
}

#[enum_dispatch(Filter)]
pub trait Filterable {
    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
}

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct NoDisambiguationEnglishPartialPosTagFilter {
    pub(crate) id: GraphId,
    pub(crate) regexp: Regex,
    pub(crate) postag_regexp: Regex,
    #[allow(dead_code)]
    pub(crate) negate_postag: bool,
}

impl Filterable for NoDisambiguationEnglishPartialPosTagFilter {
    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool {
        graph.by_id(self.id).tokens(sentence).all(|token| {
            if let Some(captures) = self.regexp.captures(&token.word().as_str()) {
                let mut tags = sentence
                    .tagger()
                    .get_tags(&captures.get(1).unwrap().as_str());

                tags.any(|x| self.postag_regexp.is_match(x.pos().as_str()))
            } else {
                false
            }
        })
    }
}