Skip to main content

chat_system/
rich_text.rs

1//! Rich text representation and format conversion.
2
3use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
4
5/// A node in a rich text tree.
6#[derive(Debug, Clone)]
7pub enum RichTextNode {
8    Plain(String),
9    Bold(Vec<RichTextNode>),
10    Italic(Vec<RichTextNode>),
11    Strikethrough(Vec<RichTextNode>),
12    Code(String),
13    CodeBlock {
14        language: Option<String>,
15        code: String,
16    },
17    Link {
18        url: String,
19        text: Vec<RichTextNode>,
20    },
21    Mention {
22        id: String,
23        name: String,
24    },
25    Emoji(String),
26    Paragraph(Vec<RichTextNode>),
27    ListItem(Vec<RichTextNode>),
28}
29
30/// A rich text document as a sequence of nodes.
31pub struct RichText(pub Vec<RichTextNode>);
32
33impl RichTextNode {
34    fn to_plain_text(&self) -> String {
35        match self {
36            RichTextNode::Plain(s) => s.clone(),
37            RichTextNode::Bold(children)
38            | RichTextNode::Italic(children)
39            | RichTextNode::Strikethrough(children)
40            | RichTextNode::Paragraph(children)
41            | RichTextNode::ListItem(children) => {
42                children.iter().map(|n| n.to_plain_text()).collect()
43            }
44            RichTextNode::Code(s) => s.clone(),
45            RichTextNode::CodeBlock { code, .. } => code.clone(),
46            RichTextNode::Link { text, .. } => text.iter().map(|n| n.to_plain_text()).collect(),
47            RichTextNode::Mention { name, .. } => format!("@{}", name),
48            RichTextNode::Emoji(e) => e.clone(),
49        }
50    }
51
52    fn to_markdown(&self) -> String {
53        match self {
54            RichTextNode::Plain(s) => s.clone(),
55            RichTextNode::Bold(children) => {
56                format!(
57                    "**{}**",
58                    children.iter().map(|n| n.to_markdown()).collect::<String>()
59                )
60            }
61            RichTextNode::Italic(children) => {
62                format!(
63                    "*{}*",
64                    children.iter().map(|n| n.to_markdown()).collect::<String>()
65                )
66            }
67            RichTextNode::Strikethrough(children) => {
68                format!(
69                    "~~{}~~",
70                    children.iter().map(|n| n.to_markdown()).collect::<String>()
71                )
72            }
73            RichTextNode::Code(s) => format!("`{}`", s),
74            RichTextNode::CodeBlock { language, code } => {
75                if let Some(lang) = language {
76                    format!("```{}\n{}\n```", lang, code)
77                } else {
78                    format!("```\n{}\n```", code)
79                }
80            }
81            RichTextNode::Link { url, text } => {
82                format!(
83                    "[{}]({})",
84                    text.iter().map(|n| n.to_markdown()).collect::<String>(),
85                    url
86                )
87            }
88            RichTextNode::Mention { name, .. } => format!("@{}", name),
89            RichTextNode::Emoji(e) => e.clone(),
90            RichTextNode::Paragraph(children) | RichTextNode::ListItem(children) => {
91                children.iter().map(|n| n.to_markdown()).collect()
92            }
93        }
94    }
95
96    fn to_matrix_html(&self) -> String {
97        match self {
98            RichTextNode::Plain(s) => html_escape(s),
99            RichTextNode::Bold(children) => {
100                format!(
101                    "<b>{}</b>",
102                    children
103                        .iter()
104                        .map(|n| n.to_matrix_html())
105                        .collect::<String>()
106                )
107            }
108            RichTextNode::Italic(children) => {
109                format!(
110                    "<i>{}</i>",
111                    children
112                        .iter()
113                        .map(|n| n.to_matrix_html())
114                        .collect::<String>()
115                )
116            }
117            RichTextNode::Strikethrough(children) => {
118                format!(
119                    "<del>{}</del>",
120                    children
121                        .iter()
122                        .map(|n| n.to_matrix_html())
123                        .collect::<String>()
124                )
125            }
126            RichTextNode::Code(s) => format!("<code>{}</code>", html_escape(s)),
127            RichTextNode::CodeBlock { language, code } => {
128                if let Some(lang) = language {
129                    format!(
130                        "<pre><code class=\"language-{}\">{}</code></pre>",
131                        lang,
132                        html_escape(code)
133                    )
134                } else {
135                    format!("<pre>{}</pre>", html_escape(code))
136                }
137            }
138            RichTextNode::Link { url, text } => {
139                format!(
140                    "<a href=\"{}\">{}</a>",
141                    html_escape(url),
142                    text.iter().map(|n| n.to_matrix_html()).collect::<String>()
143                )
144            }
145            RichTextNode::Mention { name, .. } => format!("@{}", html_escape(name)),
146            RichTextNode::Emoji(e) => html_escape(e),
147            RichTextNode::Paragraph(children) | RichTextNode::ListItem(children) => {
148                children.iter().map(|n| n.to_matrix_html()).collect()
149            }
150        }
151    }
152
153    fn to_irc_formatted(&self) -> String {
154        match self {
155            RichTextNode::Plain(s) => s.clone(),
156            RichTextNode::Bold(children) => {
157                format!(
158                    "\x02{}\x02",
159                    children
160                        .iter()
161                        .map(|n| n.to_irc_formatted())
162                        .collect::<String>()
163                )
164            }
165            RichTextNode::Italic(children) => {
166                format!(
167                    "\x1D{}\x1D",
168                    children
169                        .iter()
170                        .map(|n| n.to_irc_formatted())
171                        .collect::<String>()
172                )
173            }
174            RichTextNode::Strikethrough(children) => {
175                children.iter().map(|n| n.to_irc_formatted()).collect()
176            }
177            RichTextNode::Code(s) => format!("`{}`", s),
178            RichTextNode::CodeBlock { code, .. } => code.clone(),
179            RichTextNode::Link { url, text } => {
180                format!(
181                    "{} ({})",
182                    text.iter()
183                        .map(|n| n.to_irc_formatted())
184                        .collect::<String>(),
185                    url
186                )
187            }
188            RichTextNode::Mention { name, .. } => format!("@{}", name),
189            RichTextNode::Emoji(e) => e.clone(),
190            RichTextNode::Paragraph(children) | RichTextNode::ListItem(children) => {
191                children.iter().map(|n| n.to_irc_formatted()).collect()
192            }
193        }
194    }
195
196    fn to_whatsapp_formatted(&self) -> String {
197        match self {
198            RichTextNode::Plain(s) => s.clone(),
199            RichTextNode::Bold(children) => {
200                format!(
201                    "*{}*",
202                    children
203                        .iter()
204                        .map(|n| n.to_whatsapp_formatted())
205                        .collect::<String>()
206                )
207            }
208            RichTextNode::Italic(children) => {
209                format!(
210                    "_{}_",
211                    children
212                        .iter()
213                        .map(|n| n.to_whatsapp_formatted())
214                        .collect::<String>()
215                )
216            }
217            RichTextNode::Strikethrough(children) => {
218                format!(
219                    "~{}~",
220                    children
221                        .iter()
222                        .map(|n| n.to_whatsapp_formatted())
223                        .collect::<String>()
224                )
225            }
226            RichTextNode::Code(s) => format!("`{}`", s),
227            RichTextNode::CodeBlock { code, .. } => format!("```{}```", code),
228            RichTextNode::Link { url, text } => {
229                format!(
230                    "{} ({})",
231                    text.iter()
232                        .map(|n| n.to_whatsapp_formatted())
233                        .collect::<String>(),
234                    url
235                )
236            }
237            RichTextNode::Mention { name, .. } => format!("@{}", name),
238            RichTextNode::Emoji(e) => e.clone(),
239            RichTextNode::Paragraph(children) | RichTextNode::ListItem(children) => {
240                children.iter().map(|n| n.to_whatsapp_formatted()).collect()
241            }
242        }
243    }
244}
245
246fn html_escape(s: impl AsRef<str>) -> String {
247    let s = s.as_ref();
248    let mut out = String::with_capacity(s.len());
249    for ch in s.chars() {
250        match ch {
251            '&' => out.push_str("&amp;"),
252            '<' => out.push_str("&lt;"),
253            '>' => out.push_str("&gt;"),
254            '"' => out.push_str("&quot;"),
255            c => out.push(c),
256        }
257    }
258    out
259}
260
261impl RichText {
262    /// Convert to plain text (strips all formatting).
263    pub fn to_plain_text(&self) -> String {
264        self.0.iter().map(|n| n.to_plain_text()).collect()
265    }
266
267    /// Convert to standard CommonMark markdown.
268    pub fn to_markdown(&self) -> String {
269        self.0.iter().map(|n| n.to_markdown()).collect()
270    }
271
272    /// Convert to Discord markdown (same as CommonMark).
273    pub fn to_discord_markdown(&self) -> String {
274        self.to_markdown()
275    }
276
277    /// Convert to Telegram HTML.
278    pub fn to_telegram_html(&self) -> String {
279        crate::markdown::markdown_to_telegram_html(self.to_markdown())
280    }
281
282    /// Convert to Slack mrkdwn.
283    pub fn to_slack_mrkdwn(&self) -> String {
284        crate::markdown::markdown_to_slack(self.to_markdown())
285    }
286
287    /// Convert to Matrix HTML.
288    pub fn to_matrix_html(&self) -> String {
289        self.0.iter().map(|n| n.to_matrix_html()).collect()
290    }
291
292    /// Convert to IRC formatted text (bold=`\x02`, italic=`\x1D`).
293    pub fn to_irc_formatted(&self) -> String {
294        self.0.iter().map(|n| n.to_irc_formatted()).collect()
295    }
296
297    /// Convert to WhatsApp formatted text.
298    pub fn to_whatsapp_formatted(&self) -> String {
299        self.0.iter().map(|n| n.to_whatsapp_formatted()).collect()
300    }
301
302    /// Create from plain text.
303    pub fn from_plain(text: impl AsRef<str>) -> Self {
304        Self(vec![RichTextNode::Plain(text.as_ref().to_string())])
305    }
306
307    /// Parse from Markdown using pulldown-cmark.
308    pub fn from_markdown(text: impl AsRef<str>) -> Self {
309        let text = text.as_ref();
310        let mut opts = Options::empty();
311        opts.insert(Options::ENABLE_STRIKETHROUGH);
312        let parser = Parser::new_ext(text, opts);
313
314        let mut stack: Vec<Vec<RichTextNode>> = vec![vec![]];
315
316        for event in parser {
317            match event {
318                Event::Start(Tag::Strong)
319                | Event::Start(Tag::Emphasis)
320                | Event::Start(Tag::Strikethrough) => {
321                    stack.push(vec![]);
322                }
323                Event::Start(Tag::Link { dest_url, .. }) => {
324                    stack.push(vec![RichTextNode::Plain(dest_url.to_string())]);
325                    stack.push(vec![]);
326                }
327                Event::Start(Tag::CodeBlock(kind)) => {
328                    let lang = match kind {
329                        pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
330                            Some(lang.to_string())
331                        }
332                        _ => None,
333                    };
334                    stack.push(vec![RichTextNode::Plain(lang.unwrap_or_default())]);
335                    stack.push(vec![]);
336                }
337                Event::End(TagEnd::Strong) => {
338                    let children = stack.pop().unwrap_or_default();
339                    if let Some(top) = stack.last_mut() {
340                        top.push(RichTextNode::Bold(children));
341                    }
342                }
343                Event::End(TagEnd::Emphasis) => {
344                    let children = stack.pop().unwrap_or_default();
345                    if let Some(top) = stack.last_mut() {
346                        top.push(RichTextNode::Italic(children));
347                    }
348                }
349                Event::End(TagEnd::Strikethrough) => {
350                    let children = stack.pop().unwrap_or_default();
351                    if let Some(top) = stack.last_mut() {
352                        top.push(RichTextNode::Strikethrough(children));
353                    }
354                }
355                Event::End(TagEnd::Link) => {
356                    let link_text = stack.pop().unwrap_or_default();
357                    let url_node = stack.pop().unwrap_or_default();
358                    let url = if let Some(RichTextNode::Plain(u)) = url_node.into_iter().next() {
359                        u
360                    } else {
361                        String::new()
362                    };
363                    if let Some(top) = stack.last_mut() {
364                        top.push(RichTextNode::Link {
365                            url,
366                            text: link_text,
367                        });
368                    }
369                }
370                Event::End(TagEnd::CodeBlock) => {
371                    let code_nodes = stack.pop().unwrap_or_default();
372                    let lang_node = stack.pop().unwrap_or_default();
373                    let lang = if let Some(RichTextNode::Plain(l)) = lang_node.into_iter().next() {
374                        if l.is_empty() {
375                            None
376                        } else {
377                            Some(l)
378                        }
379                    } else {
380                        None
381                    };
382                    let code: String = code_nodes.iter().map(|n| n.to_plain_text()).collect();
383                    if let Some(top) = stack.last_mut() {
384                        top.push(RichTextNode::CodeBlock {
385                            language: lang,
386                            code,
387                        });
388                    }
389                }
390                Event::Code(text) => {
391                    if let Some(top) = stack.last_mut() {
392                        top.push(RichTextNode::Code(text.to_string()));
393                    }
394                }
395                Event::Text(text) => {
396                    if let Some(top) = stack.last_mut() {
397                        top.push(RichTextNode::Plain(text.to_string()));
398                    }
399                }
400                Event::SoftBreak | Event::HardBreak => {
401                    if let Some(top) = stack.last_mut() {
402                        top.push(RichTextNode::Plain("\n".into()));
403                    }
404                }
405                _ => {}
406            }
407        }
408
409        Self(stack.into_iter().next().unwrap_or_default())
410    }
411}
412
413impl std::fmt::Display for RichText {
414    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
415        f.write_str(&self.to_plain_text())
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn plain_text_strips_formatting() {
425        let rt = RichText(vec![
426            RichTextNode::Bold(vec![RichTextNode::Plain("hello".into())]),
427            RichTextNode::Plain(" world".into()),
428        ]);
429        assert_eq!(rt.to_plain_text(), "hello world");
430    }
431
432    #[test]
433    fn discord_bold_renders_stars() {
434        let rt = RichText(vec![RichTextNode::Bold(vec![RichTextNode::Plain(
435            "hi".into(),
436        )])]);
437        assert!(rt.to_discord_markdown().contains("**hi**"));
438    }
439
440    #[test]
441    fn matrix_bold_renders_b_tag() {
442        let rt = RichText(vec![RichTextNode::Bold(vec![RichTextNode::Plain(
443            "hi".into(),
444        )])]);
445        assert!(rt.to_matrix_html().contains("<b>hi</b>"));
446    }
447
448    #[test]
449    fn irc_bold_uses_control_char() {
450        let rt = RichText(vec![RichTextNode::Bold(vec![RichTextNode::Plain(
451            "hi".into(),
452        )])]);
453        let s = rt.to_irc_formatted();
454        assert!(s.contains('\x02'));
455    }
456
457    #[test]
458    fn whatsapp_bold_uses_stars() {
459        let rt = RichText(vec![RichTextNode::Bold(vec![RichTextNode::Plain(
460            "hi".into(),
461        )])]);
462        assert!(rt.to_whatsapp_formatted().contains("*hi*"));
463    }
464
465    #[test]
466    fn from_markdown_parses_bold() {
467        let rt = RichText::from_markdown("**bold text**");
468        assert!(rt.0.iter().any(|n| matches!(n, RichTextNode::Bold(_))));
469    }
470
471    #[test]
472    fn display_gives_plain_text() {
473        let rt = RichText(vec![RichTextNode::Plain("hello".into())]);
474        assert_eq!(rt.to_string(), "hello");
475    }
476
477    #[test]
478    fn code_block_roundtrip() {
479        let rt = RichText(vec![RichTextNode::CodeBlock {
480            language: Some("rust".into()),
481            code: "let x = 1;".into(),
482        }]);
483        let md = rt.to_markdown();
484        assert!(md.contains("```rust"));
485        assert!(md.contains("let x = 1;"));
486    }
487}