Skip to main content

chat_system/
markdown.rs

1//! Markdown-to-platform conversion utilities.
2//!
3//! Functions for converting standard Markdown to platform-specific formats.
4
5/// Telegram message character limit.
6pub const TELEGRAM_MAX_LEN: usize = 4096;
7/// Slack message character limit.
8pub const SLACK_MAX_LEN: usize = 40_000;
9
10fn escape_html(s: impl AsRef<str>) -> String {
11    let s = s.as_ref();
12    let mut out = String::with_capacity(s.len());
13    for ch in s.chars() {
14        match ch {
15            '&' => out.push_str("&amp;"),
16            '<' => out.push_str("&lt;"),
17            '>' => out.push_str("&gt;"),
18            c => out.push(c),
19        }
20    }
21    out
22}
23
24/// Convert Markdown to Telegram HTML.
25///
26/// Telegram supports `<b>`, `<i>`, `<s>`, `<code>`, `<pre>`, `<a href="">`.
27pub fn markdown_to_telegram_html(md: impl AsRef<str>) -> String {
28    let escaped = escape_html(md);
29    let mut out = String::with_capacity(escaped.len() + 64);
30    let mut i = 0;
31
32    while i < escaped.len() {
33        // Fenced code block: ```
34        if escaped.get(i..i + 3) == Some("```") {
35            i += 3;
36            let start = i;
37            let mut code_end = None;
38            let mut j = i;
39            while j <= escaped.len().saturating_sub(3) {
40                if escaped.get(j..j + 3) == Some("```") {
41                    code_end = Some(j);
42                    break;
43                }
44                // advance by one char
45                let ch_len = escaped[j..].chars().next().map_or(1, |c| c.len_utf8());
46                j += ch_len;
47            }
48            if let Some(end) = code_end {
49                let content = &escaped[start..end];
50                let (lang, code) = if let Some(nl) = content.find('\n') {
51                    let maybe_lang = content[..nl].trim();
52                    if !maybe_lang.is_empty() && !maybe_lang.contains(' ') {
53                        (maybe_lang, &content[nl + 1..])
54                    } else {
55                        ("", content)
56                    }
57                } else {
58                    ("", content)
59                };
60                if lang.is_empty() {
61                    out.push_str("<pre>");
62                    out.push_str(code);
63                    out.push_str("</pre>");
64                } else {
65                    out.push_str("<pre><code class=\"language-");
66                    out.push_str(lang);
67                    out.push_str("\">");
68                    out.push_str(code);
69                    out.push_str("</code></pre>");
70                }
71                i = end + 3;
72            } else {
73                out.push_str("```");
74            }
75            continue;
76        }
77
78        // Inline code: `
79        if escaped.get(i..i + 1) == Some("`") {
80            i += 1;
81            let start = i;
82            while i < escaped.len() && escaped.get(i..i + 1) != Some("`") {
83                let ch_len = escaped[i..].chars().next().map_or(1, |c| c.len_utf8());
84                i += ch_len;
85            }
86            out.push_str("<code>");
87            out.push_str(&escaped[start..i]);
88            out.push_str("</code>");
89            if i < escaped.len() {
90                i += 1; // skip closing `
91            }
92            continue;
93        }
94
95        // Bold: **
96        if escaped.get(i..i + 2) == Some("**") {
97            i += 2;
98            let start = i;
99            if let Some(rel) = escaped[i..].find("**") {
100                let end = i + rel;
101                out.push_str("<b>");
102                out.push_str(&escaped[start..end]);
103                out.push_str("</b>");
104                i = end + 2;
105            } else {
106                out.push_str("**");
107            }
108            continue;
109        }
110
111        // Strikethrough: ~~
112        if escaped.get(i..i + 2) == Some("~~") {
113            i += 2;
114            let start = i;
115            if let Some(rel) = escaped[i..].find("~~") {
116                let end = i + rel;
117                out.push_str("<s>");
118                out.push_str(&escaped[start..end]);
119                out.push_str("</s>");
120                i = end + 2;
121            } else {
122                out.push_str("~~");
123            }
124            continue;
125        }
126
127        // Italic: * (single)
128        if escaped.get(i..i + 1) == Some("*") {
129            i += 1;
130            let start = i;
131            if let Some(rel) = escaped[i..].find('*') {
132                let end = i + rel;
133                out.push_str("<i>");
134                out.push_str(&escaped[start..end]);
135                out.push_str("</i>");
136                i = end + 1;
137            } else {
138                out.push('*');
139            }
140            continue;
141        }
142
143        // Italic: _text_
144        if escaped.get(i..i + 1) == Some("_") {
145            i += 1;
146            let start = i;
147            if let Some(rel) = escaped[i..].find('_') {
148                let end = i + rel;
149                out.push_str("<i>");
150                out.push_str(&escaped[start..end]);
151                out.push_str("</i>");
152                i = end + 1;
153            } else {
154                out.push('_');
155            }
156            continue;
157        }
158
159        // Link: [text](url)
160        if escaped.get(i..i + 1) == Some("[") {
161            if let Some(close_bracket_rel) = escaped[i..].find("](") {
162                let text_end = i + close_bracket_rel;
163                let url_start = text_end + 2;
164                if let Some(close_paren_rel) = escaped[url_start..].find(')') {
165                    let url_end = url_start + close_paren_rel;
166                    let link_text = &escaped[i + 1..text_end];
167                    let url = &escaped[url_start..url_end];
168                    out.push_str("<a href=\"");
169                    out.push_str(url);
170                    out.push_str("\">");
171                    out.push_str(link_text);
172                    out.push_str("</a>");
173                    i = url_end + 1;
174                    continue;
175                }
176            }
177        }
178
179        // Regular char
180        let ch = escaped[i..].chars().next().unwrap_or(' ');
181        out.push(ch);
182        i += ch.len_utf8();
183    }
184
185    out
186}
187
188/// Convert Markdown to Slack mrkdwn format.
189pub fn markdown_to_slack(text: impl AsRef<str>) -> String {
190    let text = text.as_ref();
191    let mut out = String::with_capacity(text.len());
192    let mut i = 0;
193    let bytes = text.as_bytes();
194    let len = text.len();
195
196    while i < len {
197        // Bold: **text** → *text*
198        if text.get(i..i + 2) == Some("**") {
199            i += 2;
200            let start = i;
201            if let Some(rel) = text[i..].find("**") {
202                let end = i + rel;
203                out.push('*');
204                out.push_str(&text[start..end]);
205                out.push('*');
206                i = end + 2;
207            } else {
208                out.push_str("**");
209            }
210            continue;
211        }
212
213        // Strikethrough: ~~text~~ → ~text~
214        if text.get(i..i + 2) == Some("~~") {
215            i += 2;
216            let start = i;
217            if let Some(rel) = text[i..].find("~~") {
218                let end = i + rel;
219                out.push('~');
220                out.push_str(&text[start..end]);
221                out.push('~');
222                i = end + 2;
223            } else {
224                out.push_str("~~");
225            }
226            continue;
227        }
228
229        // Link: [text](url) → <url|text>
230        if text.get(i..i + 1) == Some("[") {
231            if let Some(close_bracket_rel) = text[i..].find("](") {
232                let text_end = i + close_bracket_rel;
233                let url_start = text_end + 2;
234                if let Some(close_paren_rel) = text[url_start..].find(')') {
235                    let url_end = url_start + close_paren_rel;
236                    let link_text = &text[i + 1..text_end];
237                    let url = &text[url_start..url_end];
238                    out.push('<');
239                    out.push_str(url);
240                    out.push('|');
241                    out.push_str(link_text);
242                    out.push('>');
243                    i = url_end + 1;
244                    continue;
245                }
246            }
247        }
248
249        // Header: # at line start → *Header*
250        if (i == 0 || bytes.get(i.saturating_sub(1)) == Some(&b'\n'))
251            && text.get(i..i + 1) == Some("#")
252        {
253            // Count heading level (we just flatten to bold)
254            let mut hashes = 0;
255            let mut j = i;
256            while text.get(j..j + 1) == Some("#") {
257                hashes += 1;
258                j += 1;
259            }
260            if hashes > 0 && text.get(j..j + 1) == Some(" ") {
261                j += 1; // skip space
262                let line_end = text[j..].find('\n').map_or(text.len(), |p| j + p);
263                out.push('*');
264                out.push_str(&text[j..line_end]);
265                out.push('*');
266                i = line_end;
267                continue;
268            }
269        }
270
271        // Regular char
272        let ch = text[i..].chars().next().unwrap_or(' ');
273        out.push(ch);
274        i += ch.len_utf8();
275    }
276
277    out
278}
279
280/// Find the largest char boundary at or before `pos` in `s`.
281fn floor_char_boundary(s: &str, pos: usize) -> usize {
282    let mut e = pos.min(s.len());
283    while e > 0 && !s.is_char_boundary(e) {
284        e -= 1;
285    }
286    e
287}
288
289/// Convert markdown to Telegram HTML then split into chunks of at most `max_len` bytes.
290pub fn chunk_markdown_html(md: impl AsRef<str>, max_len: usize) -> Vec<String> {
291    let html = markdown_to_telegram_html(md);
292    if html.len() <= max_len {
293        return vec![html];
294    }
295
296    let mut chunks = Vec::new();
297    let mut current = String::new();
298
299    for line in html.split('\n') {
300        let with_newline = if current.is_empty() {
301            line.to_string()
302        } else {
303            format!("\n{}", line)
304        };
305
306        if current.len() + with_newline.len() > max_len {
307            if !current.is_empty() {
308                chunks.push(current.clone());
309                current = line.to_string();
310            } else {
311                // Single line exceeds max_len, force split
312                let mut pos = 0;
313                while pos < line.len() {
314                    let end = floor_char_boundary(line, pos + max_len);
315                    let end = if end <= pos { pos + 1 } else { end };
316                    let end = end.min(line.len());
317                    chunks.push(line[pos..end].to_string());
318                    pos = end;
319                }
320            }
321        } else {
322            current.push_str(&with_newline);
323        }
324    }
325
326    if !current.is_empty() {
327        chunks.push(current);
328    }
329
330    chunks
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_escape_html() {
339        assert_eq!(escape_html("a & b"), "a &amp; b");
340        assert_eq!(escape_html("<tag>"), "&lt;tag&gt;");
341    }
342
343    #[test]
344    fn test_telegram_bold() {
345        let result = markdown_to_telegram_html("**hello**");
346        assert_eq!(result, "<b>hello</b>");
347    }
348
349    #[test]
350    fn test_telegram_italic_star() {
351        let result = markdown_to_telegram_html("*hello*");
352        assert_eq!(result, "<i>hello</i>");
353    }
354
355    #[test]
356    fn test_telegram_strike() {
357        let result = markdown_to_telegram_html("~~hello~~");
358        assert_eq!(result, "<s>hello</s>");
359    }
360
361    #[test]
362    fn test_telegram_inline_code() {
363        let result = markdown_to_telegram_html("`code`");
364        assert_eq!(result, "<code>code</code>");
365    }
366
367    #[test]
368    fn test_telegram_link() {
369        let result = markdown_to_telegram_html("[click](https://example.com)");
370        assert_eq!(result, "<a href=\"https://example.com\">click</a>");
371    }
372
373    #[test]
374    fn test_telegram_html_escape() {
375        let result = markdown_to_telegram_html("a & b");
376        assert!(result.contains("&amp;"));
377    }
378
379    #[test]
380    fn test_slack_bold() {
381        assert_eq!(markdown_to_slack("**hello**"), "*hello*");
382    }
383
384    #[test]
385    fn test_slack_strike() {
386        assert_eq!(markdown_to_slack("~~hello~~"), "~hello~");
387    }
388
389    #[test]
390    fn test_slack_link() {
391        assert_eq!(
392            markdown_to_slack("[click](https://example.com)"),
393            "<https://example.com|click>"
394        );
395    }
396
397    #[test]
398    fn test_slack_header() {
399        assert_eq!(markdown_to_slack("# Hello"), "*Hello*");
400    }
401
402    #[test]
403    fn test_chunk_small() {
404        let chunks = chunk_markdown_html("hello", 100);
405        assert_eq!(chunks.len(), 1);
406        assert_eq!(chunks[0], "hello");
407    }
408
409    #[test]
410    fn test_chunk_split() {
411        let long_md = "line1\nline2\nline3";
412        let chunks = chunk_markdown_html(long_md, 8);
413        assert!(chunks.len() > 1);
414        for chunk in &chunks {
415            assert!(chunk.len() <= 8);
416        }
417    }
418}