dw_transform/
classify.rs

1/// Transforms for classifying (tagging and categorizing) events.
2///
3/// Based on code in dw_research: https://github.com/DeskWatch/dw-research/blob/master/dw_research/classify.py
4use dw_models::Event;
5use regex::{Regex, RegexBuilder};
6
7pub enum Rule {
8    None,
9    Regex(RegexRule),
10}
11
12impl RuleTrait for Rule {
13    fn matches(&self, event: &Event) -> bool {
14        match self {
15            Rule::None => false,
16            Rule::Regex(rule) => rule.matches(event),
17        }
18    }
19}
20
21trait RuleTrait {
22    fn matches(&self, event: &Event) -> bool;
23}
24
25pub struct RegexRule {
26    regex: Regex,
27}
28
29impl RegexRule {
30    pub fn new(regex_str: &str, ignore_case: bool) -> Result<RegexRule, regex::Error> {
31        let mut regex_builder = RegexBuilder::new(regex_str);
32        regex_builder.case_insensitive(ignore_case);
33        let regex = regex_builder.build()?;
34        Ok(RegexRule { regex })
35    }
36}
37
38/// This struct defines the rules for classification.
39/// For now it just needs to contain the regex to match with, but in the future it might contain a
40/// glob-pattern, or other options for classifying.
41/// It's puropse is to make the API easy to extend in the future without having to break backwards
42/// compatibility (or have to maintain "old" query2 functions).
43impl RuleTrait for RegexRule {
44    fn matches(&self, event: &Event) -> bool {
45        event
46            .data
47            .values()
48            .filter(|val| val.is_string())
49            .any(|val| self.regex.is_match(val.as_str().unwrap()))
50    }
51}
52
53impl From<Regex> for Rule {
54    fn from(re: Regex) -> Self {
55        Rule::Regex(RegexRule { regex: re })
56    }
57}
58
59/// Categorizes a list of events
60///
61/// An event can only have one category, although the category may have a hierarchy,
62/// for instance: "Work -> DeskWatch -> dw-server-rust"
63/// If multiple categories match, the deepest one will be chosen.
64pub fn categorize(mut events: Vec<Event>, rules: &[(Vec<String>, Rule)]) -> Vec<Event> {
65    let mut classified_events = Vec::new();
66    for event in events.drain(..) {
67        classified_events.push(categorize_one(event, rules));
68    }
69    classified_events
70}
71
72fn categorize_one(mut event: Event, rules: &[(Vec<String>, Rule)]) -> Event {
73    let mut category: Vec<String> = vec!["Uncategorized".into()];
74    for (cat, rule) in rules {
75        if rule.matches(&event) {
76            category = _pick_highest_ranking_category(category, &cat);
77        }
78    }
79    event
80        .data
81        .insert("$category".into(), serde_json::json!(category));
82    event
83}
84
85/// Tags a list of events
86///
87/// An event can have many tags (as opposed to only one category) which will be put into the `$tags` key of
88/// the event data object.
89pub fn tag(mut events: Vec<Event>, rules: &[(String, Rule)]) -> Vec<Event> {
90    let mut events_tagged = Vec::new();
91    for event in events.drain(..) {
92        events_tagged.push(tag_one(event, &rules));
93    }
94    events_tagged
95}
96
97fn tag_one(mut event: Event, rules: &[(String, Rule)]) -> Event {
98    let mut tags: Vec<String> = Vec::new();
99    for (cls, rule) in rules {
100        if rule.matches(&event) {
101            tags.push(cls.clone());
102        }
103    }
104    tags.sort_unstable();
105    tags.dedup();
106    event.data.insert("$tags".into(), serde_json::json!(tags));
107    event
108}
109
110fn _pick_highest_ranking_category(acc: Vec<String>, item: &[String]) -> Vec<String> {
111    if item.len() >= acc.len() {
112        // If tag is category with greater or equal depth than current, then choose the new one instead.
113        item.to_vec()
114    } else {
115        acc
116    }
117}
118
119#[test]
120fn test_rule() {
121    let mut e_match = Event::default();
122    e_match
123        .data
124        .insert("test".into(), serde_json::json!("just a test"));
125
126    let mut e_no_match = Event::default();
127    e_no_match
128        .data
129        .insert("nonono".into(), serde_json::json!("no match!"));
130
131    let rule_from_regex = Rule::from(Regex::new("test").unwrap());
132    let rule_from_new = Rule::Regex(RegexRule::new("test", false).unwrap());
133    let rule_none = Rule::None;
134    assert_eq!(rule_from_regex.matches(&e_match), true);
135    assert_eq!(rule_from_new.matches(&e_match), true);
136    assert_eq!(rule_from_regex.matches(&e_no_match), false);
137    assert_eq!(rule_from_new.matches(&e_no_match), false);
138
139    assert_eq!(rule_none.matches(&e_match), false);
140}
141
142#[test]
143fn test_categorize() {
144    let mut e = Event::default();
145    e.data
146        .insert("test".into(), serde_json::json!("just a test"));
147
148    let mut events = vec![e];
149    let rules: Vec<(Vec<String>, Rule)> = vec![
150        (
151            vec!["Test".into()],
152            Rule::from(Regex::new(r"test").unwrap()),
153        ),
154        (
155            vec!["Test".into(), "Subtest".into()],
156            Rule::from(Regex::new(r"test").unwrap()),
157        ),
158        (
159            vec!["Other".into()],
160            Rule::from(Regex::new(r"nonmatching").unwrap()),
161        ),
162    ];
163    events = categorize(events, &rules);
164
165    assert_eq!(events.len(), 1);
166    assert_eq!(
167        events.first().unwrap().data.get("$category").unwrap(),
168        &serde_json::json!(vec!["Test", "Subtest"])
169    );
170}
171
172#[test]
173fn test_categorize_uncategorized() {
174    // Checks that the category correctly becomes uncategorized when no category matches
175    let mut e = Event::default();
176    e.data
177        .insert("test".into(), serde_json::json!("just a test"));
178
179    let mut events = vec![e];
180    let rules: Vec<(Vec<String>, Rule)> = vec![(
181        vec!["Non-matching".into(), "test".into()],
182        Rule::from(Regex::new(r"not going to match").unwrap()),
183    )];
184    events = categorize(events, &rules);
185
186    assert_eq!(events.len(), 1);
187    assert_eq!(
188        events.first().unwrap().data.get("$category").unwrap(),
189        &serde_json::json!(vec!["Uncategorized"])
190    );
191}
192
193#[test]
194fn test_tag() {
195    let mut e = Event::default();
196    e.data
197        .insert("test".into(), serde_json::json!("just a test"));
198
199    let mut events = vec![e];
200    let rules: Vec<(String, Rule)> = vec![
201        ("test".into(), Rule::from(Regex::new(r"test").unwrap())),
202        ("test-2".into(), Rule::from(Regex::new(r"test").unwrap())),
203        (
204            "nomatch".into(),
205            Rule::from(Regex::new(r"nomatch").unwrap()),
206        ),
207    ];
208    events = tag(events, &rules);
209
210    assert_eq!(events.len(), 1);
211
212    let event = events.first().unwrap();
213    let tags = event.data.get("$tags").unwrap();
214    assert_eq!(tags, &serde_json::json!(vec!["test", "test-2"]));
215}