Skip to main content

dmc_transform/builtin/
emoji.rs

1//! Emoji shortcodes -> unicode. See `transformers/emoji.md` for full docs.
2
3use crate::pipeline::Transformer;
4use crate::visit::{NodeAction, Visitor, walk_root};
5use dmc_diagnostic::Code;
6use dmc_diagnostic::metadata::SourceMeta;
7use dmc_parser::ast::*;
8use duck_diagnostic::DiagnosticEngine;
9
10#[derive(Default, Debug)]
11pub struct Emoji;
12
13impl Transformer for Emoji {
14  fn name(&self) -> &str {
15    "emoji"
16  }
17  fn transform(&self, doc: &mut Document, _meta: &SourceMeta, _engine: &mut DiagnosticEngine<Code>) {
18    let mut v = Apply;
19    walk_root(&mut doc.children, &mut v);
20  }
21}
22
23struct Apply;
24
25impl Visitor for Apply {
26  fn visit_node(&mut self, node: &mut Node) -> NodeAction {
27    let Node::Text(t) = node else { return NodeAction::Keep };
28    let Some(replaced) = expand_emoji(&t.value) else { return NodeAction::Keep };
29    t.value = replaced;
30    NodeAction::Keep
31  }
32}
33
34/// Walk `text` and replace every recognised `:shortcode:` with its emoji.
35/// Returns `None` when no shortcode matched, so callers can skip the
36/// allocation.
37fn expand_emoji(text: &str) -> Option<String> {
38  if !text.contains(':') {
39    return None;
40  }
41  let bytes = text.as_bytes();
42  let mut out = String::with_capacity(text.len());
43  let mut i = 0;
44  let mut found_any = false;
45  while i < bytes.len() {
46    if bytes[i] != b':' {
47      let ch_len = utf8_char_len(bytes[i]);
48      out.push_str(&text[i..i + ch_len]);
49      i += ch_len;
50      continue;
51    }
52    // Look ahead for the closing colon. Shortcodes are short ASCII tokens
53    // (`[a-z0-9_+-]+`); cap the search so a colon-pair miles apart never
54    // becomes a fake match.
55    let max_end = (i + 1 + 64).min(bytes.len());
56    let close = (i + 1..max_end).find(|&j| bytes[j] == b':');
57    let Some(close) = close else {
58      out.push(':');
59      i += 1;
60      continue;
61    };
62    let shortcode = &text[i + 1..close];
63    if !is_shortcode(shortcode) {
64      out.push(':');
65      i += 1;
66      continue;
67    }
68    if let Some(emo) = emojis::get_by_shortcode(shortcode) {
69      out.push_str(emo.as_str());
70      i = close + 1;
71      found_any = true;
72    } else {
73      out.push(':');
74      i += 1;
75    }
76  }
77  if found_any { Some(out) } else { None }
78}
79
80fn is_shortcode(s: &str) -> bool {
81  !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'+' | b'-'))
82}
83
84fn utf8_char_len(b: u8) -> usize {
85  if b < 0x80 {
86    1
87  } else if b < 0xE0 {
88    2
89  } else if b < 0xF0 {
90    3
91  } else {
92    4
93  }
94}
95
96#[cfg(test)]
97mod tests {
98  use super::*;
99
100  #[test]
101  fn passthrough_when_no_colon() {
102    assert!(expand_emoji("hello world").is_none());
103  }
104
105  #[test]
106  fn known_shortcode_expands() {
107    let out = expand_emoji("hi :smile: there").unwrap();
108    assert!(out.contains("hi "));
109    assert!(out.contains(" there"));
110    assert!(!out.contains(":smile:"), "shortcode survived: {out}");
111  }
112
113  #[test]
114  fn unknown_shortcode_is_kept() {
115    assert!(expand_emoji("see :nonexistent_emoji_token: here").is_none());
116  }
117
118  #[test]
119  fn ratio_text_passes_through() {
120    // `:1:2` is not a valid shortcode; must not be munged.
121    assert!(expand_emoji("ratio 1:2").is_none());
122  }
123
124  #[test]
125  fn multiple_shortcodes() {
126    let out = expand_emoji(":heart: and :star:").unwrap();
127    assert!(!out.contains(':'), "leftover colon: {out}");
128  }
129}