Skip to main content

layer_client/
parsers.rs

1//! Text formatting parsers: HTML and Markdown ↔ Telegram [`MessageEntity`]
2//!
3//! # Markdown (Telegram-flavoured)
4//! Supported: `**bold**`, `__italic__`, `~~strike~~`, `||spoiler||`, `` `code` ``,
5//! ` ```lang\npre``` `, `[text](url)`, `[text](tg://user?id=123)`
6//!
7//! # HTML
8//! Supported tags: `<b>`, `<strong>`, `<i>`, `<em>`, `<u>`, `<s>`, `<del>`,
9//! `<code>`, `<pre>`, `<tg-spoiler>`, `<a href="url">`,
10//! `<tg-emoji emoji-id="id">text</tg-emoji>`
11//!
12//! # Feature gates
13//! * `html`      — enables `parse_html` / `generate_html` via the built-in hand-rolled
14//!   parser (zero extra deps).
15//! * `html5ever` — replaces `parse_html` with a spec-compliant html5ever tokenizer.
16//!   `generate_html` is always the same hand-rolled generator.
17
18use layer_tl_types as tl;
19
20// ─── Markdown ─────────────────────────────────────────────────────────────────
21
22/// Parse Telegram-flavoured markdown into (plain_text, entities).
23pub fn parse_markdown(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
24    let mut out   = String::with_capacity(text.len());
25    let mut ents  = Vec::new();
26    let chars: Vec<char> = text.chars().collect();
27    let n = chars.len();
28    let mut i = 0;
29    let mut open_stack: Vec<(MarkdownTag, i32)> = Vec::new();
30    let mut utf16_off: i32 = 0;
31
32    macro_rules! push_char {
33        ($c:expr) => {{ let c: char = $c; out.push(c); utf16_off += c.len_utf16() as i32; }};
34    }
35
36    while i < n {
37        // ── code block ```lang\n...``` ──────────────────────────────────────
38        if i + 2 < n && chars[i] == '`' && chars[i+1] == '`' && chars[i+2] == '`' {
39            let start = i + 3;
40            let mut j = start;
41            while j + 2 < n {
42                if chars[j] == '`' && chars[j+1] == '`' && chars[j+2] == '`' { break; }
43                j += 1;
44            }
45            if j + 2 < n {
46                let block: String = chars[start..j].iter().collect();
47                let (lang, code) = if let Some(nl) = block.find('\n') {
48                    (block[..nl].trim().to_string(), block[nl+1..].to_string())
49                } else { (String::new(), block) };
50                let code_off = utf16_off;
51                let code_utf16: i32 = code.encode_utf16().count() as i32;
52                ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
53                    offset: code_off, length: code_utf16, language: lang,
54                }));
55                for c in code.chars() { push_char!(c); }
56                i = j + 3;
57                continue;
58            }
59        }
60
61        // ── inline code ─────────────────────────────────────────────────────
62        if chars[i] == '`' {
63            let start = i + 1;
64            let mut j = start;
65            while j < n && chars[j] != '`' { j += 1; }
66            if j < n {
67                let code: String = chars[start..j].iter().collect();
68                let code_off = utf16_off;
69                let code_utf16: i32 = code.encode_utf16().count() as i32;
70                ents.push(tl::enums::MessageEntity::Code(tl::types::MessageEntityCode {
71                    offset: code_off, length: code_utf16,
72                }));
73                for c in code.chars() { push_char!(c); }
74                i = j + 1;
75                continue;
76            }
77        }
78
79        // ── [text](url) ─────────────────────────────────────────────────────
80        if chars[i] == '[' {
81            let text_start = i + 1;
82            let mut j = text_start;
83            let mut depth = 1i32;
84            while j < n {
85                if chars[j] == '[' { depth += 1; }
86                if chars[j] == ']' { depth -= 1; if depth == 0 { break; } }
87                j += 1;
88            }
89            if j < n && j + 1 < n && chars[j+1] == '(' {
90                let link_start = j + 2;
91                let mut k = link_start;
92                while k < n && chars[k] != ')' { k += 1; }
93                if k < n {
94                    let inner_text: String = chars[text_start..j].iter().collect();
95                    let url: String = chars[link_start..k].iter().collect();
96                    const MENTION_PFX: &str = "tg://user?id=";
97                    let ent_off = utf16_off;
98                    for c in inner_text.chars() { push_char!(c); }
99                    let ent_len = utf16_off - ent_off;
100                    if let Some(stripped) = url.strip_prefix(MENTION_PFX) {
101                        if let Ok(uid) = stripped.parse::<i64>() {
102                            ents.push(tl::enums::MessageEntity::MentionName(
103                                tl::types::MessageEntityMentionName { offset: ent_off, length: ent_len, user_id: uid }
104                            ));
105                        }
106                    } else {
107                        ents.push(tl::enums::MessageEntity::TextUrl(
108                            tl::types::MessageEntityTextUrl { offset: ent_off, length: ent_len, url }
109                        ));
110                    }
111                    i = k + 1;
112                    continue;
113                }
114            }
115        }
116
117        // ── two-char delimiters ──────────────────────────────────────────────
118        let two: Option<(&str, MarkdownTag)> = if i + 1 < n {
119            match [chars[i], chars[i+1]] {
120                ['*','*'] => Some(("**", MarkdownTag::Bold)),
121                ['_','_'] => Some(("__", MarkdownTag::Italic)),
122                ['~','~'] => Some(("~~", MarkdownTag::Strike)),
123                ['|','|'] => Some(("||", MarkdownTag::Spoiler)),
124                _         => None,
125            }
126        } else { None };
127
128        if let Some((_delim, tag)) = two {
129            if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
130                let (_, start_off) = open_stack.remove(pos);
131                let length = utf16_off - start_off;
132                let entity = match tag {
133                    MarkdownTag::Bold    => tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset: start_off, length }),
134                    MarkdownTag::Italic  => tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset: start_off, length }),
135                    MarkdownTag::Strike  => tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset: start_off, length }),
136                    MarkdownTag::Spoiler => tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset: start_off, length }),
137                };
138                if length > 0 { ents.push(entity); }
139            } else {
140                open_stack.push((tag, utf16_off));
141            }
142            i += 2;
143            continue;
144        }
145
146        push_char!(chars[i]);
147        i += 1;
148    }
149
150    (out, ents)
151}
152
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
154enum MarkdownTag { Bold, Italic, Strike, Spoiler }
155
156/// Generate Telegram markdown from plain text + entities.
157pub fn generate_markdown(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
158    use tl::enums::MessageEntity as ME;
159    let mut insertions: Vec<(i32, &'static str)> = Vec::new();
160    for ent in entities {
161        match ent {
162            ME::Bold(e)    => { insertions.push((e.offset, "**")); insertions.push((e.offset+e.length, "**")); }
163            ME::Italic(e)  => { insertions.push((e.offset, "__")); insertions.push((e.offset+e.length, "__")); }
164            ME::Strike(e)  => { insertions.push((e.offset, "~~")); insertions.push((e.offset+e.length, "~~")); }
165            ME::Spoiler(e) => { insertions.push((e.offset, "||")); insertions.push((e.offset+e.length, "||")); }
166            ME::Code(e)    => { insertions.push((e.offset, "`"));  insertions.push((e.offset+e.length, "`")); }
167            _ => {}
168        }
169    }
170    insertions.sort_by_key(|&(pos, _)| pos);
171
172    let mut result = String::with_capacity(text.len() + insertions.len() * 4);
173    let mut ins_idx = 0;
174    let mut utf16_pos: i32 = 0;
175    for ch in text.chars() {
176        while ins_idx < insertions.len() && insertions[ins_idx].0 <= utf16_pos {
177            result.push_str(insertions[ins_idx].1);
178            ins_idx += 1;
179        }
180        result.push(ch);
181        utf16_pos += ch.len_utf16() as i32;
182    }
183    while ins_idx < insertions.len() { result.push_str(insertions[ins_idx].1); ins_idx += 1; }
184    result
185}
186
187// ─── HTML parser — built-in hand-rolled (no extra deps) ──────────────────────
188// Compiled when `html5ever` feature is NOT active.
189
190/// Parse a Telegram-compatible HTML string into (plain_text, entities).
191///
192/// Hand-rolled, zero-dependency implementation.  Override with the
193/// `html5ever` Cargo feature for a spec-compliant tokenizer.
194#[cfg(not(feature = "html5ever"))]
195pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
196    let mut out        = String::with_capacity(html.len());
197    let mut ents       = Vec::new();
198    let mut stack: Vec<(HtmlTag, i32, Option<String>)> = Vec::new();
199    let mut utf16_off: i32 = 0;
200
201    let bytes = html.as_bytes();
202    let len   = bytes.len();
203    let mut i = 0;
204
205    while i < len {
206        if bytes[i] == b'<' {
207            let tag_start = i + 1;
208            let mut j = tag_start;
209            while j < len && bytes[j] != b'>' { j += 1; }
210            let tag_content = &html[tag_start..j];
211            i = j + 1;
212
213            let is_close = tag_content.starts_with('/');
214            let tag_str  = if is_close { tag_content[1..].trim() } else { tag_content.trim() };
215            let (tag_name, attrs) = parse_tag(tag_str);
216
217            if is_close {
218                if let Some(pos) = stack.iter().rposition(|(t, _, _)| t.name() == tag_name) {
219                    let (htag, start_off, extra) = stack.remove(pos);
220                    let length = utf16_off - start_off;
221                    if length > 0 {
222                        let entity = match htag {
223                            HtmlTag::Bold    => Some(tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset: start_off, length })),
224                            HtmlTag::Italic  => Some(tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset: start_off, length })),
225                            HtmlTag::Underline => Some(tl::enums::MessageEntity::Underline(tl::types::MessageEntityUnderline { offset: start_off, length })),
226                            HtmlTag::Strike  => Some(tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset: start_off, length })),
227                            HtmlTag::Spoiler => Some(tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset: start_off, length })),
228                            HtmlTag::Code    => Some(tl::enums::MessageEntity::Code(tl::types::MessageEntityCode { offset: start_off, length })),
229                            HtmlTag::Pre     => Some(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre { offset: start_off, length, language: extra.unwrap_or_default() })),
230                            HtmlTag::Link(url) => {
231                                const PFX: &str = "tg://user?id=";
232                                if let Some(stripped) = url.strip_prefix(PFX) {
233                                    stripped.parse::<i64>().ok().map(|uid|
234                                        tl::enums::MessageEntity::MentionName(tl::types::MessageEntityMentionName { offset: start_off, length, user_id: uid }))
235                                } else {
236                                    Some(tl::enums::MessageEntity::TextUrl(tl::types::MessageEntityTextUrl { offset: start_off, length, url }))
237                                }
238                            }
239                            HtmlTag::CustomEmoji(id) => Some(tl::enums::MessageEntity::CustomEmoji(tl::types::MessageEntityCustomEmoji { offset: start_off, length, document_id: id })),
240                            HtmlTag::Unknown => None,
241                        };
242                        if let Some(e) = entity { ents.push(e); }
243                    }
244                }
245            } else {
246                let htag = match tag_name {
247                    "b" | "strong"         => HtmlTag::Bold,
248                    "i" | "em"             => HtmlTag::Italic,
249                    "u"                    => HtmlTag::Underline,
250                    "s" | "del" | "strike" => HtmlTag::Strike,
251                    "tg-spoiler"           => HtmlTag::Spoiler,
252                    "code"                 => HtmlTag::Code,
253                    "pre"                  => HtmlTag::Pre,
254                    "a" => HtmlTag::Link(attrs.iter().find(|(k, _)| k == "href").map(|(_, v)| v.clone()).unwrap_or_default()),
255                    "tg-emoji" => HtmlTag::CustomEmoji(attrs.iter().find(|(k, _)| k == "emoji-id").and_then(|(_, v)| v.parse::<i64>().ok()).unwrap_or(0)),
256                    "br" => { out.push('\n'); utf16_off += 1; continue; }
257                    _ => HtmlTag::Unknown,
258                };
259                stack.push((htag, utf16_off, None));
260            }
261        } else {
262            let text_start = i;
263            while i < len && bytes[i] != b'<' { i += 1; }
264            let decoded = decode_html_entities(&html[text_start..i]);
265            for ch in decoded.chars() { out.push(ch); utf16_off += ch.len_utf16() as i32; }
266        }
267    }
268
269    (out, ents)
270}
271
272#[cfg(not(feature = "html5ever"))]
273fn decode_html_entities(s: &str) -> String {
274    s.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">")
275     .replace("&quot;", "\"").replace("&#39;", "'").replace("&nbsp;", "\u{00A0}")
276}
277
278#[cfg(not(feature = "html5ever"))]
279fn parse_tag(s: &str) -> (&str, Vec<(String, String)>) {
280    let mut parts = s.splitn(2, char::is_whitespace);
281    let name  = parts.next().unwrap_or("").trim_end_matches('/');
282    let attrs = parse_attrs(parts.next().unwrap_or(""));
283    (name, attrs)
284}
285
286#[cfg(not(feature = "html5ever"))]
287fn parse_attrs(s: &str) -> Vec<(String, String)> {
288    let mut result = Vec::new();
289    let mut rem = s.trim();
290    while !rem.is_empty() {
291        if let Some(eq) = rem.find('=') {
292            let key = rem[..eq].trim().to_string();
293            rem = rem[eq+1..].trim_start();
294            let (val, rest) = if let Some(s) = rem.strip_prefix('"') {
295                let end = s.find('"').map(|p| p + 1).unwrap_or(rem.len() - 1);
296                (rem[1..end].to_string(), &rem[end+1..])
297            } else if let Some(s) = rem.strip_prefix('\'') {
298                let end = s.find('\'').map(|p| p + 1).unwrap_or(rem.len() - 1);
299                (rem[1..end].to_string(), &rem[end+1..])
300            } else {
301                let end = rem.find(char::is_whitespace).unwrap_or(rem.len());
302                (rem[..end].to_string(), &rem[end..])
303            };
304            result.push((key, val));
305            rem = rest.trim_start();
306        } else { break; }
307    }
308    result
309}
310
311#[cfg(not(feature = "html5ever"))]
312#[allow(dead_code)]
313#[derive(Debug, Clone)]
314enum HtmlTag {
315    Bold, Italic, Underline, Strike, Spoiler, Code, Pre,
316    Link(String), CustomEmoji(i64), Unknown,
317}
318
319#[cfg(not(feature = "html5ever"))]
320impl HtmlTag {
321    fn name(&self) -> &str {
322        match self {
323            Self::Bold           => "b",
324            Self::Italic         => "i",
325            Self::Underline      => "u",
326            Self::Strike         => "s",
327            Self::Spoiler        => "tg-spoiler",
328            Self::Code           => "code",
329            Self::Pre            => "pre",
330            Self::Link(_)        => "a",
331            Self::CustomEmoji(_) => "tg-emoji",
332            Self::Unknown        => "",
333        }
334    }
335}
336
337// ─── HTML parser — html5ever backend ─────────────────────────────────────────
338// Compiled when `html5ever` feature IS active; overrides the built-in parser.
339
340/// Parse a Telegram-compatible HTML string into (plain_text, entities).
341///
342/// Uses the [`html5ever`] spec-compliant tokenizer.  Enable the `html5ever`
343/// Cargo feature to activate this implementation.
344#[cfg(feature = "html5ever")]
345pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
346    use std::cell::Cell;
347    use html5ever::tendril::StrTendril;
348    use html5ever::tokenizer::{
349        BufferQueue, Tag, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer,
350    };
351
352    struct Sink {
353        text:     Cell<String>,
354        entities: Cell<Vec<tl::enums::MessageEntity>>,
355        offset:   Cell<i32>,
356    }
357
358    impl TokenSink for Sink {
359        type Handle = ();
360
361        fn process_token(&self, token: Token, _line: u64) -> TokenSinkResult<()> {
362            let mut text     = self.text.take();
363            let mut entities = self.entities.take();
364            let mut offset   = self.offset.get();
365
366            // Close the most-recent open entity of `$kind` (open = length==0).
367            // Removes the entity if start == end (zero-length element).
368            macro_rules! close_ent {
369                ($kind:ident) => {{
370                    if let Some(idx) = entities.iter().rposition(|e|
371                        matches!(e, tl::enums::MessageEntity::$kind(_)))
372                    {
373                        let closed_len = {
374                            if let tl::enums::MessageEntity::$kind(ref mut inner) = entities[idx] {
375                                inner.length = offset - inner.offset;
376                                inner.length
377                            } else { unreachable!() }
378                        };
379                        if closed_len == 0 { entities.remove(idx); }
380                    }
381                }};
382            }
383
384            match token {
385                // ── Start tags ───────────────────────────────────────────────
386                Token::TagToken(Tag { kind: TagKind::StartTag, name, attrs, .. }) => {
387                    let len0 = 0i32;
388                    match name.as_ref() {
389                        "b" | "strong" =>
390                            entities.push(tl::enums::MessageEntity::Bold(
391                                tl::types::MessageEntityBold { offset, length: len0 })),
392                        "i" | "em" =>
393                            entities.push(tl::enums::MessageEntity::Italic(
394                                tl::types::MessageEntityItalic { offset, length: len0 })),
395                        "u" =>
396                            entities.push(tl::enums::MessageEntity::Underline(
397                                tl::types::MessageEntityUnderline { offset, length: len0 })),
398                        "s" | "del" | "strike" =>
399                            entities.push(tl::enums::MessageEntity::Strike(
400                                tl::types::MessageEntityStrike { offset, length: len0 })),
401                        "tg-spoiler" =>
402                            entities.push(tl::enums::MessageEntity::Spoiler(
403                                tl::types::MessageEntitySpoiler { offset, length: len0 })),
404                        "code" => {
405                            // Inside an open <pre>? Annotate language on the pre entity.
406                            let in_pre = entities.last().map_or(false, |e| {
407                                matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0)
408                            });
409                            if in_pre {
410                                let lang = attrs.iter()
411                                    .find(|a| a.name.local.as_ref() == "class")
412                                    .and_then(|a| {
413                                        let v: &str = a.value.as_ref();
414                                        v.strip_prefix("language-")
415                                    })
416                                    .map(|s| s.to_string())
417                                    .unwrap_or_default();
418                                if let Some(tl::enums::MessageEntity::Pre(ref mut p)) = entities.last_mut() {
419                                    p.language = lang;
420                                }
421                            } else {
422                                entities.push(tl::enums::MessageEntity::Code(
423                                    tl::types::MessageEntityCode { offset, length: len0 }));
424                            }
425                        }
426                        "pre" =>
427                            entities.push(tl::enums::MessageEntity::Pre(
428                                tl::types::MessageEntityPre { offset, length: len0, language: String::new() })),
429                        "a" => {
430                            let href = attrs.iter()
431                                .find(|a| a.name.local.as_ref() == "href")
432                                .map(|a| { let v: &str = a.value.as_ref(); v.to_string() })
433                                .unwrap_or_default();
434                            const MENTION_PFX: &str = "tg://user?id=";
435                            if href.starts_with(MENTION_PFX) {
436                                if let Ok(uid) = href[MENTION_PFX.len()..].parse::<i64>() {
437                                    entities.push(tl::enums::MessageEntity::MentionName(
438                                        tl::types::MessageEntityMentionName { offset, length: len0, user_id: uid }));
439                                }
440                            } else {
441                                entities.push(tl::enums::MessageEntity::TextUrl(
442                                    tl::types::MessageEntityTextUrl { offset, length: len0, url: href }));
443                            }
444                        }
445                        "tg-emoji" => {
446                            let doc_id = attrs.iter()
447                                .find(|a| a.name.local.as_ref() == "emoji-id")
448                                .and_then(|a| { let v: &str = a.value.as_ref(); v.parse::<i64>().ok() })
449                                .unwrap_or(0);
450                            entities.push(tl::enums::MessageEntity::CustomEmoji(
451                                tl::types::MessageEntityCustomEmoji { offset, length: len0, document_id: doc_id }));
452                        }
453                        "br" => { text.push('\n'); offset += 1; }
454                        _ => {}
455                    }
456                }
457
458                // ── End tags ─────────────────────────────────────────────────
459                Token::TagToken(Tag { kind: TagKind::EndTag, name, .. }) => {
460                    match name.as_ref() {
461                        "b" | "strong"         => close_ent!(Bold),
462                        "i" | "em"             => close_ent!(Italic),
463                        "u"                    => close_ent!(Underline),
464                        "s" | "del" | "strike" => close_ent!(Strike),
465                        "tg-spoiler"           => close_ent!(Spoiler),
466                        "code" => {
467                            // Inside open <pre>: pre absorbs the code tag.
468                            let in_pre = entities.last().map_or(false, |e| {
469                                matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0)
470                            });
471                            if !in_pre { close_ent!(Code); }
472                        }
473                        "pre"      => close_ent!(Pre),
474                        "a" => {
475                            match entities.last() {
476                                Some(tl::enums::MessageEntity::MentionName(_)) => close_ent!(MentionName),
477                                _ => close_ent!(TextUrl),
478                            }
479                        }
480                        "tg-emoji" => close_ent!(CustomEmoji),
481                        _ => {}
482                    }
483                }
484
485                // ── Text content ─────────────────────────────────────────────
486                Token::CharacterTokens(s) => {
487                    let s_str: &str = s.as_ref();
488                    offset += s_str.encode_utf16().count() as i32;
489                    text.push_str(s_str);
490                }
491
492                _ => {}
493            }
494
495            self.text.replace(text);
496            self.entities.replace(entities);
497            self.offset.replace(offset);
498            TokenSinkResult::Continue
499        }
500    }
501
502    let mut input = BufferQueue::default();
503    input.push_back(StrTendril::from_slice(html).try_reinterpret().unwrap());
504
505    let tok = Tokenizer::new(
506        Sink {
507            text:     Cell::new(String::with_capacity(html.len())),
508            entities: Cell::new(Vec::new()),
509            offset:   Cell::new(0),
510        },
511        Default::default(),
512    );
513    let _ = tok.feed(&mut input);
514    tok.end();
515
516    let Sink { text, entities, .. } = tok.sink;
517    (text.take(), entities.take())
518}
519
520// ─── HTML generator (always available, no html5ever dependency) ───────────────
521
522/// Generate Telegram-compatible HTML from plain text + entities.
523pub fn generate_html(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
524    use tl::enums::MessageEntity as ME;
525
526    let mut markers: Vec<(i32, bool, String)> = Vec::new();
527
528    for ent in entities {
529        let (off, len, open, close) = match ent {
530            ME::Bold(e)        => (e.offset, e.length, "<b>".into(),           "</b>".into()),
531            ME::Italic(e)      => (e.offset, e.length, "<i>".into(),           "</i>".into()),
532            ME::Underline(e)   => (e.offset, e.length, "<u>".into(),           "</u>".into()),
533            ME::Strike(e)      => (e.offset, e.length, "<s>".into(),           "</s>".into()),
534            ME::Spoiler(e)     => (e.offset, e.length, "<tg-spoiler>".into(),  "</tg-spoiler>".into()),
535            ME::Code(e)        => (e.offset, e.length, "<code>".into(),        "</code>".into()),
536            ME::Pre(e) => {
537                let lang = if e.language.is_empty() { String::new() }
538                           else { format!(" class=\"language-{}\"", e.language) };
539                (e.offset, e.length, format!("<pre><code{lang}>"), "</code></pre>".into())
540            }
541            ME::TextUrl(e)     => (e.offset, e.length, format!("<a href=\"{}\">", escape_html(&e.url)), "</a>".into()),
542            ME::MentionName(e) => (e.offset, e.length, format!("<a href=\"tg://user?id={}\">", e.user_id), "</a>".into()),
543            ME::CustomEmoji(e) => (e.offset, e.length, format!("<tg-emoji emoji-id=\"{}\">", e.document_id), "</tg-emoji>".into()),
544            _ => continue,
545        };
546        markers.push((off,       true,  open));
547        markers.push((off + len, false, close));
548    }
549
550    markers.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
551        a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
552    });
553
554    let mut result = String::with_capacity(text.len() + markers.iter().map(|(_, _, s)| s.len()).sum::<usize>());
555    let mut marker_idx = 0;
556    let mut utf16_pos: i32 = 0;
557
558    for ch in text.chars() {
559        while marker_idx < markers.len() && markers[marker_idx].0 <= utf16_pos {
560            result.push_str(&markers[marker_idx].2);
561            marker_idx += 1;
562        }
563        match ch {
564            '&' => result.push_str("&amp;"),
565            '<' => result.push_str("&lt;"),
566            '>' => result.push_str("&gt;"),
567            '"' => result.push_str("&quot;"),
568            c   => result.push(c),
569        }
570        utf16_pos += ch.len_utf16() as i32;
571    }
572    while marker_idx < markers.len() {
573        result.push_str(&markers[marker_idx].2);
574        marker_idx += 1;
575    }
576
577    result
578}
579
580fn escape_html(s: &str) -> String {
581    s.replace('&', "&amp;").replace('<', "&lt;").replace('>', "&gt;").replace('"', "&quot;")
582}
583
584// ─── Tests ────────────────────────────────────────────────────────────────────
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589
590    #[test]
591    fn markdown_bold() {
592        let (text, ents) = parse_markdown("Hello **world**!");
593        assert_eq!(text, "Hello world!");
594        assert_eq!(ents.len(), 1);
595        if let tl::enums::MessageEntity::Bold(b) = &ents[0] {
596            assert_eq!(b.offset, 6);
597            assert_eq!(b.length, 5);
598        } else { panic!("expected bold"); }
599    }
600
601    #[test]
602    fn markdown_inline_code() {
603        let (text, ents) = parse_markdown("Use `foo()` to do it");
604        assert_eq!(text, "Use foo() to do it");
605        assert!(matches!(ents[0], tl::enums::MessageEntity::Code(_)));
606    }
607
608    #[test]
609    fn html_bold_italic() {
610        let (text, ents) = parse_html("<b>bold</b> and <i>italic</i>");
611        assert_eq!(text, "bold and italic");
612        assert_eq!(ents.len(), 2);
613    }
614
615    #[test]
616    fn html_link() {
617        let (text, ents) = parse_html("<a href=\"https://example.com\">click</a>");
618        assert_eq!(text, "click");
619        if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
620            assert_eq!(e.url, "https://example.com");
621        } else { panic!("expected text url"); }
622    }
623
624    // HTML entity decoding is a hand-rolled-only feature; html5ever handles it natively.
625    #[cfg(not(feature = "html5ever"))]
626    #[test]
627    fn html_entities_decoded() {
628        let (text, _) = parse_html("A &amp; B &lt;3&gt;");
629        assert_eq!(text, "A & B <3>");
630    }
631
632    #[test]
633    fn generate_html_roundtrip() {
634        let original = "Hello world";
635        let entities = vec![tl::enums::MessageEntity::Bold(
636            tl::types::MessageEntityBold { offset: 0, length: 5 })];
637        let html = generate_html(original, &entities);
638        assert_eq!(html, "<b>Hello</b> world");
639        let (back, ents2) = parse_html(&html);
640        assert_eq!(back, original);
641        assert_eq!(ents2.len(), 1);
642    }
643}