1use crate::rule::{engine::composition::GraphId, MatchGraph, MatchSentence};
2use crate::utils::regex::Regex;
3use enum_dispatch::enum_dispatch;
4use serde::{Deserialize, Serialize};
5
6#[enum_dispatch]
7#[derive(Debug, Serialize, Deserialize, Clone)]
8pub enum Filter {
9 NoDisambiguationEnglishPartialPosTagFilter,
10}
11
12#[enum_dispatch(Filter)]
13pub trait Filterable {
14 fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
15}
16
17#[derive(Debug, Serialize, Deserialize, Clone)]
18pub struct NoDisambiguationEnglishPartialPosTagFilter {
19 pub(crate) id: GraphId,
20 pub(crate) regexp: Regex,
21 pub(crate) postag_regexp: Regex,
22 #[allow(dead_code)]
23 pub(crate) negate_postag: bool,
24}
25
26impl Filterable for NoDisambiguationEnglishPartialPosTagFilter {
27 fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool {
28 graph.by_id(self.id).tokens(sentence).all(|token| {
29 if let Some(captures) = self.regexp.captures(&token.word().as_str()) {
30 let mut tags = sentence
31 .tagger()
32 .get_tags(&captures.get(1).unwrap().as_str());
33
34 tags.any(|x| self.postag_regexp.is_match(x.pos().as_str()))
35 } else {
36 false
37 }
38 })
39 }
40}