Skip to main content

dmc_transform/builtin/
emoji.rs

1//! Replace `:shortcode:` patterns in text with the matching Unicode emoji.
2//! Mirrors `remark-emoji` in the JS chain. Unknown shortcodes are left
3//! untouched so doc text containing colons (`:foo:bar:`) survives intact.
4//!
5//! Only `Text` nodes are visited - code blocks, inline code, JSX, and
6//! attribute values are left alone, matching the JS plugin's scope.
7
8use crate::pipeline::Transformer;
9use crate::visit::{NodeAction, Visitor, walk_root};
10use dmc_diagnostic::Code;
11use dmc_diagnostic::metadata::SourceMeta;
12use dmc_parser::ast::*;
13use duck_diagnostic::DiagnosticEngine;
14
15#[derive(Default, Debug)]
16pub struct Emoji;
17
18impl Transformer for Emoji {
19  fn name(&self) -> &str {
20    "emoji"
21  }
22  fn transform(&self, doc: &mut Document, _meta: &SourceMeta, _engine: &mut DiagnosticEngine<Code>) {
23    let mut v = Apply;
24    walk_root(&mut doc.children, &mut v);
25  }
26}
27
28struct Apply;
29
30impl Visitor for Apply {
31  fn visit_node(&mut self, node: &mut Node) -> NodeAction {
32    let Node::Text(t) = node else { return NodeAction::Keep };
33    let Some(replaced) = expand_emoji(&t.value) else { return NodeAction::Keep };
34    t.value = replaced;
35    NodeAction::Keep
36  }
37}
38
39/// Walk `text` and replace every recognised `:shortcode:` with its emoji.
40/// Returns `None` when no shortcode matched, so callers can skip the
41/// allocation.
42fn expand_emoji(text: &str) -> Option<String> {
43  if !text.contains(':') {
44    return None;
45  }
46  let bytes = text.as_bytes();
47  let mut out = String::with_capacity(text.len());
48  let mut i = 0;
49  let mut found_any = false;
50  while i < bytes.len() {
51    if bytes[i] != b':' {
52      let ch_len = utf8_char_len(bytes[i]);
53      out.push_str(&text[i..i + ch_len]);
54      i += ch_len;
55      continue;
56    }
57    // Look ahead for the closing colon. Shortcodes are short ASCII tokens
58    // (`[a-z0-9_+-]+`); cap the search so a colon-pair miles apart never
59    // becomes a fake match.
60    let max_end = (i + 1 + 64).min(bytes.len());
61    let close = (i + 1..max_end).find(|&j| bytes[j] == b':');
62    let Some(close) = close else {
63      out.push(':');
64      i += 1;
65      continue;
66    };
67    let shortcode = &text[i + 1..close];
68    if !is_shortcode(shortcode) {
69      out.push(':');
70      i += 1;
71      continue;
72    }
73    if let Some(emo) = emojis::get_by_shortcode(shortcode) {
74      out.push_str(emo.as_str());
75      i = close + 1;
76      found_any = true;
77    } else {
78      out.push(':');
79      i += 1;
80    }
81  }
82  if found_any { Some(out) } else { None }
83}
84
85fn is_shortcode(s: &str) -> bool {
86  !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'+' | b'-'))
87}
88
89fn utf8_char_len(b: u8) -> usize {
90  if b < 0x80 {
91    1
92  } else if b < 0xE0 {
93    2
94  } else if b < 0xF0 {
95    3
96  } else {
97    4
98  }
99}
100
101#[cfg(test)]
102mod tests {
103  use super::*;
104
105  #[test]
106  fn passthrough_when_no_colon() {
107    assert!(expand_emoji("hello world").is_none());
108  }
109
110  #[test]
111  fn known_shortcode_expands() {
112    let out = expand_emoji("hi :smile: there").unwrap();
113    assert!(out.contains("hi "));
114    assert!(out.contains(" there"));
115    assert!(!out.contains(":smile:"), "shortcode survived: {out}");
116  }
117
118  #[test]
119  fn unknown_shortcode_is_kept() {
120    assert!(expand_emoji("see :nonexistent_emoji_token: here").is_none());
121  }
122
123  #[test]
124  fn ratio_text_passes_through() {
125    // `:1:2` is not a valid shortcode; must not be munged.
126    assert!(expand_emoji("ratio 1:2").is_none());
127  }
128
129  #[test]
130  fn multiple_shortcodes() {
131    let out = expand_emoji(":heart: and :star:").unwrap();
132    assert!(!out.contains(':'), "leftover colon: {out}");
133  }
134}