Skip to main content

doing_ops/
autotag.rs

1use std::{collections::HashMap, sync::Mutex};
2
3use doing_config::AutotagConfig;
4use doing_taskpaper::{Entry, Tag};
5use regex::Regex;
6
7static SYNONYM_REGEX_CACHE: Mutex<Option<HashMap<String, Regex>>> = Mutex::new(None);
8
9/// Apply automatic tagging to an entry based on configuration rules.
10///
11/// Processing order:
12/// 1. Default tags from config are added.
13/// 2. Whitelist: words in the title matching whitelist entries become tags.
14/// 3. Synonyms: words matching synonym patterns add the parent tag.
15/// 4. Transform: regex patterns on existing tags generate new tags.
16///
17/// Autotagging is idempotent — applying it twice does not duplicate tags.
18pub fn autotag(entry: &mut Entry, config: &AutotagConfig, default_tags: &[String]) {
19  apply_default_tags(entry, default_tags);
20  let title_lower = entry.title().to_lowercase();
21  let words: Vec<&str> = title_lower.split_whitespace().collect();
22  apply_mappings(entry, &config.mappings, &words);
23  apply_whitelist(entry, &config.whitelist, &words);
24  apply_synonyms(entry, &config.synonyms, &words);
25  apply_transforms(entry, &config.transform);
26  entry.tags_mut().dedup();
27}
28
29fn apply_default_tags(entry: &mut Entry, default_tags: &[String]) {
30  for tag_name in default_tags {
31    if !entry.tags().has(tag_name) {
32      entry.tags_mut().add(Tag::new(tag_name, None::<String>));
33    }
34  }
35}
36
37/// Apply Ruby-style key-value mappings: if word appears in title, add the mapped tag.
38fn apply_mappings(entry: &mut Entry, mappings: &HashMap<String, String>, words: &[&str]) {
39  for (word, tag_name) in mappings {
40    if entry.tags().has(tag_name) {
41      continue;
42    }
43    let target = word.to_lowercase();
44    if words.iter().any(|w| *w == target) {
45      entry.tags_mut().add(Tag::new(tag_name, None::<String>));
46    }
47  }
48}
49
50fn apply_synonyms(entry: &mut Entry, synonyms: &HashMap<String, Vec<String>>, words: &[&str]) {
51  for (tag_name, synonym_patterns) in synonyms {
52    if entry.tags().has(tag_name) {
53      continue;
54    }
55
56    for pattern in synonym_patterns {
57      if let Some(rx) = cached_synonym_regex(pattern)
58        && words.iter().any(|w| rx.is_match(w))
59      {
60        entry.tags_mut().add(Tag::new(tag_name, None::<String>));
61        break;
62      }
63    }
64  }
65}
66
67fn apply_transform(entry: &mut Entry, rule: &str) {
68  let (pattern_str, raw_replacement) = match parse_transform_rule(rule) {
69    Some(parts) => parts,
70    None => return,
71  };
72
73  let (replacement, replace_original) = parse_transform_flags(raw_replacement);
74
75  let pattern = if pattern_str.starts_with('@') {
76    pattern_str.to_string()
77  } else {
78    format!("@{}", pattern_str)
79  };
80
81  let regex = match Regex::new(&format!("(?i)^{}$", pattern)) {
82    Ok(rx) => rx,
83    Err(_) => return,
84  };
85
86  let tag_names: Vec<String> = entry.tags().iter().map(|t| t.name().to_string()).collect();
87
88  for tag_name in &tag_names {
89    let tag_str = format!("@{}", tag_name);
90    let Some(caps) = regex.captures(&tag_str) else {
91      continue;
92    };
93
94    let mut result = replacement.clone();
95    for (i, cap) in caps.iter().enumerate().skip(1) {
96      if let Some(m) = cap {
97        result = result.replace(&format!("${}", i), m.as_str());
98      }
99    }
100
101    let new_tag_names: Vec<String> = result
102      .split_whitespace()
103      .map(|t| t.strip_prefix('@').unwrap_or(t).to_string())
104      .filter(|t| !t.is_empty())
105      .collect();
106
107    if replace_original {
108      entry.tags_mut().remove(tag_name);
109    }
110
111    for new_name in &new_tag_names {
112      if !entry.tags().has(new_name) {
113        entry.tags_mut().add(Tag::new(new_name, None::<String>));
114      }
115    }
116
117    break;
118  }
119}
120
121fn apply_transforms(entry: &mut Entry, transforms: &[String]) {
122  for rule in transforms {
123    apply_transform(entry, rule);
124  }
125}
126
127fn apply_whitelist(entry: &mut Entry, whitelist: &[String], words: &[&str]) {
128  for whitelist_entry in whitelist {
129    if entry.tags().has(whitelist_entry) {
130      continue;
131    }
132
133    let target = whitelist_entry.to_lowercase();
134    if words.iter().any(|w| *w == target) {
135      let tag_name = if whitelist_entry.chars().any(|c| c.is_uppercase()) {
136        whitelist_entry.clone()
137      } else {
138        target
139      };
140      entry.tags_mut().add(Tag::new(tag_name, None::<String>));
141    }
142  }
143}
144
145fn parse_transform_flags(replacement: &str) -> (String, bool) {
146  if let Some(pos) = replacement.rfind("/r")
147    && pos + 2 == replacement.len()
148  {
149    return (replacement[..pos].to_string(), true);
150  }
151  (replacement.to_string(), false)
152}
153
154fn parse_transform_rule(rule: &str) -> Option<(&str, &str)> {
155  if rule.contains("::") {
156    let (pattern, replacement) = rule.split_once("::")?;
157    if pattern.is_empty() || replacement.is_empty() {
158      return None;
159    }
160    Some((pattern, replacement))
161  } else {
162    let (pattern, replacement) = rule.split_once(':')?;
163    if pattern.is_empty() || replacement.is_empty() {
164      return None;
165    }
166    Some((pattern, replacement))
167  }
168}
169
170/// Convert a wildcard pattern to a case-insensitive regex matching a whole word.
171///
172/// `*` matches zero or more non-whitespace characters, `?` matches exactly one.
173fn cached_synonym_regex(pattern: &str) -> Option<Regex> {
174  let mut guard = SYNONYM_REGEX_CACHE.lock().unwrap_or_else(|e| e.into_inner());
175  let cache = guard.get_or_insert_with(HashMap::new);
176  if let Some(rx) = cache.get(pattern) {
177    return Some(rx.clone());
178  }
179  let rx_str = wildcard_to_word_regex(pattern);
180  let rx = Regex::new(&rx_str).ok()?;
181  cache.insert(pattern.to_string(), rx.clone());
182  Some(rx)
183}
184
185fn wildcard_to_word_regex(pattern: &str) -> String {
186  let mut rx = String::from("(?i)^");
187  let mut buf = [0u8; 4];
188  for ch in pattern.chars() {
189    match ch {
190      '*' => rx.push_str(r"\S*"),
191      '?' => rx.push_str(r"\S"),
192      _ => rx.push_str(&regex::escape(ch.encode_utf8(&mut buf))),
193    }
194  }
195  rx.push('$');
196  rx
197}
198
199#[cfg(test)]
200mod test {
201  use chrono::{Local, TimeZone};
202  use doing_taskpaper::{Note, Tags};
203
204  use super::*;
205
206  fn default_config() -> AutotagConfig {
207    AutotagConfig {
208      mappings: HashMap::new(),
209      synonyms: HashMap::new(),
210      transform: Vec::new(),
211      whitelist: Vec::new(),
212    }
213  }
214
215  fn sample_entry(title: &str, tags: Tags) -> Entry {
216    let date = Local.with_ymd_and_hms(2024, 3, 17, 14, 30, 0).unwrap();
217    Entry::new(date, title, tags, Note::new(), "Currently", None::<String>)
218  }
219
220  fn title_words(entry: &Entry) -> Vec<String> {
221    entry
222      .title()
223      .to_lowercase()
224      .split_whitespace()
225      .map(String::from)
226      .collect()
227  }
228
229  mod apply_default_tags {
230    use pretty_assertions::assert_eq;
231
232    use super::*;
233
234    #[test]
235    fn it_adds_default_tags() {
236      let mut entry = sample_entry("Working on project", Tags::new());
237      let defaults = vec!["work".to_string(), "tracked".to_string()];
238
239      apply_default_tags(&mut entry, &defaults);
240
241      assert!(entry.tags().has("work"));
242      assert!(entry.tags().has("tracked"));
243      assert_eq!(entry.tags().len(), 2);
244    }
245
246    #[test]
247    fn it_does_not_duplicate_existing_tags() {
248      let mut entry = sample_entry(
249        "Working on project",
250        Tags::from_iter(vec![Tag::new("work", None::<String>)]),
251      );
252      let defaults = vec!["work".to_string()];
253
254      apply_default_tags(&mut entry, &defaults);
255
256      assert_eq!(entry.tags().len(), 1);
257    }
258  }
259
260  mod apply_synonyms {
261    use pretty_assertions::assert_eq;
262
263    use super::*;
264
265    #[test]
266    fn it_adds_parent_tag_when_synonym_matches() {
267      let mut entry = sample_entry("Working on typography", Tags::new());
268      let mut synonyms = HashMap::new();
269      synonyms.insert("design".to_string(), vec!["typography".to_string()]);
270
271      let words = title_words(&entry);
272      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
273      apply_synonyms(&mut entry, &synonyms, &word_refs);
274
275      assert!(entry.tags().has("design"));
276      assert_eq!(entry.tags().len(), 1);
277    }
278
279    #[test]
280    fn it_matches_case_insensitively() {
281      let mut entry = sample_entry("Working on Typography", Tags::new());
282      let mut synonyms = HashMap::new();
283      synonyms.insert("design".to_string(), vec!["typography".to_string()]);
284
285      let words = title_words(&entry);
286      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
287      apply_synonyms(&mut entry, &synonyms, &word_refs);
288
289      assert!(entry.tags().has("design"));
290    }
291
292    #[test]
293    fn it_skips_when_parent_tag_already_exists() {
294      let mut entry = sample_entry(
295        "Working on typography",
296        Tags::from_iter(vec![Tag::new("design", None::<String>)]),
297      );
298      let mut synonyms = HashMap::new();
299      synonyms.insert("design".to_string(), vec!["typography".to_string()]);
300
301      let words = title_words(&entry);
302      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
303      apply_synonyms(&mut entry, &synonyms, &word_refs);
304
305      assert_eq!(entry.tags().len(), 1);
306    }
307
308    #[test]
309    fn it_supports_wildcard_patterns() {
310      let mut entry = sample_entry("Working on typographic layout", Tags::new());
311      let mut synonyms = HashMap::new();
312      synonyms.insert("design".to_string(), vec!["typo*".to_string()]);
313
314      let words = title_words(&entry);
315      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
316      apply_synonyms(&mut entry, &synonyms, &word_refs);
317
318      assert!(entry.tags().has("design"));
319    }
320  }
321
322  mod apply_transform {
323    use pretty_assertions::assert_eq;
324
325    use super::*;
326
327    #[test]
328    fn it_generates_multiple_tags() {
329      let mut entry = sample_entry(
330        "Working on project",
331        Tags::from_iter(vec![Tag::new("frontend", None::<String>)]),
332      );
333
334      apply_transform(&mut entry, "@frontend:web ui");
335
336      assert!(entry.tags().has("frontend"));
337      assert!(entry.tags().has("web"));
338      assert!(entry.tags().has("ui"));
339      assert_eq!(entry.tags().len(), 3);
340    }
341
342    #[test]
343    fn it_generates_tag_from_capture_group() {
344      let mut entry = sample_entry(
345        "Working on project",
346        Tags::from_iter(vec![Tag::new("project-123", None::<String>)]),
347      );
348
349      apply_transform(&mut entry, "(\\w+)-\\d+:$1");
350
351      assert!(entry.tags().has("project"));
352      assert!(entry.tags().has("project-123"));
353      assert_eq!(entry.tags().len(), 2);
354    }
355
356    #[test]
357    fn it_replaces_original_with_r_flag() {
358      let mut entry = sample_entry(
359        "Working on project",
360        Tags::from_iter(vec![Tag::new("oldtag", None::<String>)]),
361      );
362
363      apply_transform(&mut entry, "@oldtag:newtag/r");
364
365      assert!(entry.tags().has("newtag"));
366      assert!(!entry.tags().has("oldtag"));
367      assert_eq!(entry.tags().len(), 1);
368    }
369
370    #[test]
371    fn it_skips_invalid_rules() {
372      let mut entry = sample_entry(
373        "Working on project",
374        Tags::from_iter(vec![Tag::new("coding", None::<String>)]),
375      );
376
377      apply_transform(&mut entry, "norule");
378
379      assert_eq!(entry.tags().len(), 1);
380    }
381
382    #[test]
383    fn it_uses_double_colon_delimiter() {
384      let mut entry = sample_entry(
385        "Working on project",
386        Tags::from_iter(vec![Tag::new("time:morning", None::<String>)]),
387      );
388
389      apply_transform(&mut entry, "@time:morning::daytime");
390
391      assert!(entry.tags().has("time:morning"));
392      assert!(entry.tags().has("daytime"));
393    }
394  }
395
396  mod apply_whitelist {
397    use pretty_assertions::assert_eq;
398
399    use super::*;
400
401    #[test]
402    fn it_adds_tag_for_matching_word() {
403      let mut entry = sample_entry("Working on design", Tags::new());
404      let whitelist = vec!["design".to_string()];
405
406      let words = title_words(&entry);
407      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
408      apply_whitelist(&mut entry, &whitelist, &word_refs);
409
410      assert!(entry.tags().has("design"));
411      assert_eq!(entry.tags().len(), 1);
412    }
413
414    #[test]
415    fn it_does_not_match_partial_words() {
416      let mut entry = sample_entry("Working on redesign", Tags::new());
417      let whitelist = vec!["design".to_string()];
418
419      let words = title_words(&entry);
420      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
421      apply_whitelist(&mut entry, &whitelist, &word_refs);
422
423      assert!(entry.tags().is_empty());
424    }
425
426    #[test]
427    fn it_matches_case_insensitively() {
428      let mut entry = sample_entry("Working on Design", Tags::new());
429      let whitelist = vec!["design".to_string()];
430
431      let words = title_words(&entry);
432      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
433      apply_whitelist(&mut entry, &whitelist, &word_refs);
434
435      assert!(entry.tags().has("design"));
436    }
437
438    #[test]
439    fn it_preserves_case_from_whitelist_entry() {
440      let mut entry = sample_entry("Working on openai stuff", Tags::new());
441      let whitelist = vec!["OpenAI".to_string()];
442
443      let words = title_words(&entry);
444      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
445      apply_whitelist(&mut entry, &whitelist, &word_refs);
446
447      assert!(entry.tags().has("OpenAI"));
448    }
449
450    #[test]
451    fn it_skips_when_tag_already_exists() {
452      let mut entry = sample_entry(
453        "Working on design",
454        Tags::from_iter(vec![Tag::new("design", None::<String>)]),
455      );
456      let whitelist = vec!["design".to_string()];
457
458      let words = title_words(&entry);
459      let word_refs: Vec<&str> = words.iter().map(|s| s.as_str()).collect();
460      apply_whitelist(&mut entry, &whitelist, &word_refs);
461
462      assert_eq!(entry.tags().len(), 1);
463    }
464  }
465
466  mod autotag {
467    use pretty_assertions::assert_eq;
468
469    use super::*;
470
471    #[test]
472    fn it_applies_all_rules() {
473      let mut entry = sample_entry("Working on design with typography", Tags::new());
474      let mut config = default_config();
475      config.whitelist = vec!["design".to_string()];
476      config
477        .synonyms
478        .insert("layout".to_string(), vec!["typography".to_string()]);
479      let defaults = vec!["tracked".to_string()];
480
481      autotag(&mut entry, &config, &defaults);
482
483      assert!(entry.tags().has("tracked"));
484      assert!(entry.tags().has("design"));
485      assert!(entry.tags().has("layout"));
486      assert_eq!(entry.tags().len(), 3);
487    }
488
489    #[test]
490    fn it_is_idempotent() {
491      let mut entry = sample_entry("Working on design", Tags::new());
492      let mut config = default_config();
493      config.whitelist = vec!["design".to_string()];
494      let defaults = vec!["tracked".to_string()];
495
496      autotag(&mut entry, &config, &defaults);
497      let tags_after_first = entry.tags().len();
498
499      autotag(&mut entry, &config, &defaults);
500
501      assert_eq!(entry.tags().len(), tags_after_first);
502    }
503  }
504
505  mod parse_transform_flags {
506    use pretty_assertions::assert_eq;
507
508    use super::*;
509
510    #[test]
511    fn it_detects_replace_flag() {
512      let (replacement, replace) = parse_transform_flags("newtag/r");
513
514      assert_eq!(replacement, "newtag");
515      assert!(replace);
516    }
517
518    #[test]
519    fn it_returns_false_without_flag() {
520      let (replacement, replace) = parse_transform_flags("newtag");
521
522      assert_eq!(replacement, "newtag");
523      assert!(!replace);
524    }
525  }
526
527  mod parse_transform_rule {
528    use pretty_assertions::assert_eq;
529
530    use super::*;
531
532    #[test]
533    fn it_returns_none_for_empty_parts() {
534      assert!(parse_transform_rule(":replacement").is_none());
535      assert!(parse_transform_rule("pattern:").is_none());
536    }
537
538    #[test]
539    fn it_returns_none_for_no_delimiter() {
540      assert!(parse_transform_rule("norule").is_none());
541    }
542
543    #[test]
544    fn it_splits_on_double_colon() {
545      let result = parse_transform_rule("pat:tern::replacement");
546
547      assert_eq!(result, Some(("pat:tern", "replacement")));
548    }
549
550    #[test]
551    fn it_splits_on_single_colon() {
552      let result = parse_transform_rule("pattern:replacement");
553
554      assert_eq!(result, Some(("pattern", "replacement")));
555    }
556  }
557
558  mod wildcard_to_word_regex {
559    use pretty_assertions::assert_eq;
560
561    use super::*;
562
563    #[test]
564    fn it_converts_question_wildcard() {
565      let rx = Regex::new(&wildcard_to_word_regex("d?sign")).unwrap();
566
567      assert!(rx.is_match("design"));
568      assert!(!rx.is_match("dsign"));
569
570      let result = wildcard_to_word_regex("d?sign");
571
572      assert_eq!(result, r"(?i)^d\Ssign$");
573    }
574
575    #[test]
576    fn it_converts_star_wildcard() {
577      let rx = Regex::new(&wildcard_to_word_regex("typo*")).unwrap();
578
579      assert!(rx.is_match("typography"));
580      assert!(rx.is_match("typographic"));
581      assert!(rx.is_match("typo"));
582      assert!(!rx.is_match("mytypo"));
583    }
584
585    #[test]
586    fn it_matches_exact_word() {
587      let rx = Regex::new(&wildcard_to_word_regex("design")).unwrap();
588
589      assert!(rx.is_match("design"));
590      assert!(rx.is_match("Design"));
591      assert!(!rx.is_match("redesign"));
592    }
593  }
594}