nlprule/filter/
mod.rs

1use crate::rule::{engine::composition::GraphId, MatchGraph, MatchSentence};
2use crate::utils::regex::Regex;
3use enum_dispatch::enum_dispatch;
4use serde::{Deserialize, Serialize};
5
6#[enum_dispatch]
7#[derive(Debug, Serialize, Deserialize, Clone)]
8pub enum Filter {
9    NoDisambiguationEnglishPartialPosTagFilter,
10}
11
12#[enum_dispatch(Filter)]
13pub trait Filterable {
14    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
15}
16
17#[derive(Debug, Serialize, Deserialize, Clone)]
18pub struct NoDisambiguationEnglishPartialPosTagFilter {
19    pub(crate) id: GraphId,
20    pub(crate) regexp: Regex,
21    pub(crate) postag_regexp: Regex,
22    #[allow(dead_code)]
23    pub(crate) negate_postag: bool,
24}
25
26impl Filterable for NoDisambiguationEnglishPartialPosTagFilter {
27    fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool {
28        graph.by_id(self.id).tokens(sentence).all(|token| {
29            if let Some(captures) = self.regexp.captures(&token.word().as_str()) {
30                let mut tags = sentence
31                    .tagger()
32                    .get_tags(&captures.get(1).unwrap().as_str());
33
34                tags.any(|x| self.postag_regexp.is_match(x.pos().as_str()))
35            } else {
36                false
37            }
38        })
39    }
40}