1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
use crate::rule::{engine::composition::GraphId, MatchGraph, MatchSentence};
use crate::utils::regex::Regex;
use enum_dispatch::enum_dispatch;
use serde::{Deserialize, Serialize};

#[enum_dispatch]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum Filter {
    NoDisambiguationEnglishPartialPosTagFilter,
}

#[enum_dispatch(Filter)]
pub trait Filterable {
    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
}

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct NoDisambiguationEnglishPartialPosTagFilter {
    pub(crate) id: GraphId,
    pub(crate) regexp: Regex,
    pub(crate) postag_regexp: Regex,
    #[allow(dead_code)]
    pub(crate) negate_postag: bool,
}

impl Filterable for NoDisambiguationEnglishPartialPosTagFilter {
    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool {
        graph.by_id(self.id).tokens(sentence).all(|token| {
            if let Some(captures) = self.regexp.captures(&token.word().text.as_ref()) {
                let tags = sentence
                    .tagger()
                    .get_tags(&captures.get(1).unwrap().as_str());

                tags.iter()
                    .any(|x| self.postag_regexp.is_match(x.pos.as_ref()))
            } else {
                false
            }
        })
    }
}