1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
use crate::rule::{engine::composition::GraphId, MatchGraph, MatchSentence}; use crate::utils::regex::Regex; use enum_dispatch::enum_dispatch; use serde::{Deserialize, Serialize}; #[enum_dispatch] #[derive(Debug, Serialize, Deserialize, Clone)] pub enum Filter { NoDisambiguationEnglishPartialPosTagFilter, } #[enum_dispatch(Filter)] pub trait Filterable { fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool; } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct NoDisambiguationEnglishPartialPosTagFilter { pub(crate) id: GraphId, pub(crate) regexp: Regex, pub(crate) postag_regexp: Regex, #[allow(dead_code)] pub(crate) negate_postag: bool, } impl Filterable for NoDisambiguationEnglishPartialPosTagFilter { fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool { graph.by_id(self.id).tokens(sentence).all(|token| { if let Some(captures) = self.regexp.captures(&token.word().text.as_ref()) { let tags = sentence .tagger() .get_tags(&captures.get(1).unwrap().as_str()); tags.iter() .any(|x| self.postag_regexp.is_match(x.pos.as_ref())) } else { false } }) } }