1use layer_tl_types as tl;
52
53pub fn parse_markdown(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
57 let mut out = String::with_capacity(text.len());
58 let mut ents = Vec::new();
59 let chars: Vec<char> = text.chars().collect();
60 let n = chars.len();
61 let mut i = 0;
62 let mut open_stack: Vec<(MarkdownTag, i32)> = Vec::new();
63 let mut utf16_off: i32 = 0;
64
65 macro_rules! push_char {
66 ($c:expr) => {{
67 let c: char = $c;
68 out.push(c);
69 utf16_off += c.len_utf16() as i32;
70 }};
71 }
72
73 while i < n {
74 if chars[i] == '\\' && i + 1 < n {
76 let next = chars[i + 1];
77 if matches!(
78 next,
79 '*' | '_' | '~' | '|' | '[' | ']' | '(' | ')' | '`' | '\\' | '!'
80 ) {
81 push_char!(next);
82 i += 2;
83 continue;
84 }
85 }
86
87 if i + 2 < n && chars[i] == '`' && chars[i + 1] == '`' && chars[i + 2] == '`' {
89 let start = i + 3;
90 let mut j = start;
91 while j + 2 < n {
92 if chars[j] == '`' && chars[j + 1] == '`' && chars[j + 2] == '`' {
93 break;
94 }
95 j += 1;
96 }
97 if j + 2 < n {
98 let block: String = chars[start..j].iter().collect();
99 let (lang, code) = if let Some(nl) = block.find('\n') {
100 (block[..nl].trim().to_string(), block[nl + 1..].to_string())
101 } else {
102 (String::new(), block)
103 };
104 let code_off = utf16_off;
105 let code_utf16: i32 = code.encode_utf16().count() as i32;
106 ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
107 offset: code_off,
108 length: code_utf16,
109 language: lang,
110 }));
111 for c in code.chars() {
112 push_char!(c);
113 }
114 i = j + 3;
115 continue;
116 }
117 }
118
119 if chars[i] == '`' {
121 let start = i + 1;
122 let mut j = start;
123 while j < n && chars[j] != '`' {
124 j += 1;
125 }
126 if j < n {
127 let code: String = chars[start..j].iter().collect();
128 let code_off = utf16_off;
129 let code_utf16: i32 = code.encode_utf16().count() as i32;
130 ents.push(tl::enums::MessageEntity::Code(
131 tl::types::MessageEntityCode {
132 offset: code_off,
133 length: code_utf16,
134 },
135 ));
136 for c in code.chars() {
137 push_char!(c);
138 }
139 i = j + 1;
140 continue;
141 }
142 }
143
144 if chars[i] == '!' && i + 1 < n && chars[i + 1] == '[' {
146 let text_start = i + 2;
147 let mut j = text_start;
148 while j < n && chars[j] != ']' {
149 j += 1;
150 }
151 if j < n && j + 1 < n && chars[j + 1] == '(' {
152 let link_start = j + 2;
153 let mut k = link_start;
154 while k < n && chars[k] != ')' {
155 k += 1;
156 }
157 if k < n {
158 let inner_text: String = chars[text_start..j].iter().collect();
159 let url: String = chars[link_start..k].iter().collect();
160 const EMOJI_PFX: &str = "tg://emoji?id=";
161 if let Some(stripped) = url.strip_prefix(EMOJI_PFX)
162 && let Ok(doc_id) = stripped.parse::<i64>()
163 {
164 let ent_off = utf16_off;
165 for c in inner_text.chars() {
166 push_char!(c);
167 }
168 ents.push(tl::enums::MessageEntity::CustomEmoji(
169 tl::types::MessageEntityCustomEmoji {
170 offset: ent_off,
171 length: utf16_off - ent_off,
172 document_id: doc_id,
173 },
174 ));
175 i = k + 1;
176 continue;
177 }
178 }
179 }
180 }
181
182 if chars[i] == '[' {
184 let text_start = i + 1;
185 let mut j = text_start;
186 let mut depth = 1i32;
187 while j < n {
188 if chars[j] == '[' {
189 depth += 1;
190 }
191 if chars[j] == ']' {
192 depth -= 1;
193 if depth == 0 {
194 break;
195 }
196 }
197 j += 1;
198 }
199 if j < n && j + 1 < n && chars[j + 1] == '(' {
200 let link_start = j + 2;
201 let mut k = link_start;
202 while k < n && chars[k] != ')' {
203 k += 1;
204 }
205 if k < n {
206 let inner_text: String = chars[text_start..j].iter().collect();
207 let url: String = chars[link_start..k].iter().collect();
208 const MENTION_PFX: &str = "tg://user?id=";
209 let ent_off = utf16_off;
210 for c in inner_text.chars() {
211 push_char!(c);
212 }
213 let ent_len = utf16_off - ent_off;
214 if let Some(stripped) = url.strip_prefix(MENTION_PFX) {
215 if let Ok(uid) = stripped.parse::<i64>() {
216 ents.push(tl::enums::MessageEntity::MentionName(
217 tl::types::MessageEntityMentionName {
218 offset: ent_off,
219 length: ent_len,
220 user_id: uid,
221 },
222 ));
223 }
224 } else {
225 ents.push(tl::enums::MessageEntity::TextUrl(
226 tl::types::MessageEntityTextUrl {
227 offset: ent_off,
228 length: ent_len,
229 url,
230 },
231 ));
232 }
233 i = k + 1;
234 continue;
235 }
236 }
237 }
238
239 let two: Option<MarkdownTag> = if i + 1 < n {
241 match [chars[i], chars[i + 1]] {
242 ['*', '*'] => Some(MarkdownTag::Bold),
243 ['_', '_'] => Some(MarkdownTag::Italic),
244 ['~', '~'] => Some(MarkdownTag::Strike),
245 ['|', '|'] => Some(MarkdownTag::Spoiler),
246 _ => None,
247 }
248 } else {
249 None
250 };
251
252 if let Some(tag) = two {
253 if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
254 let (_, start_off) = open_stack.remove(pos);
255 let length = utf16_off - start_off;
256 if length > 0 {
257 ents.push(make_entity(tag, start_off, length));
258 }
259 } else {
260 open_stack.push((tag, utf16_off));
261 }
262 i += 2;
263 continue;
264 }
265
266 let one: Option<MarkdownTag> = match chars[i] {
269 '*' => Some(MarkdownTag::Bold),
270 '_' => Some(MarkdownTag::Italic),
271 _ => None,
272 };
273
274 if let Some(tag) = one {
275 if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
276 let (_, start_off) = open_stack.remove(pos);
277 let length = utf16_off - start_off;
278 if length > 0 {
279 ents.push(make_entity(tag, start_off, length));
280 }
281 } else {
282 open_stack.push((tag, utf16_off));
283 }
284 i += 1;
285 continue;
286 }
287
288 push_char!(chars[i]);
289 i += 1;
290 }
291
292 (out, ents)
293}
294
295fn make_entity(tag: MarkdownTag, offset: i32, length: i32) -> tl::enums::MessageEntity {
296 match tag {
297 MarkdownTag::Bold => {
298 tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset, length })
299 }
300 MarkdownTag::Italic => {
301 tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset, length })
302 }
303 MarkdownTag::Strike => {
304 tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset, length })
305 }
306 MarkdownTag::Spoiler => {
307 tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset, length })
308 }
309 }
310}
311
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313enum MarkdownTag {
314 Bold,
315 Italic,
316 Strike,
317 Spoiler,
318}
319
320pub fn generate_markdown(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
325 use tl::enums::MessageEntity as ME;
326
327 let mut insertions: Vec<(i32, bool, String)> = Vec::new();
330
331 for ent in entities {
332 match ent {
333 ME::Bold(e) => {
334 insertions.push((e.offset, true, "**".into()));
335 insertions.push((e.offset + e.length, false, "**".into()));
336 }
337 ME::Italic(e) => {
338 insertions.push((e.offset, true, "__".into()));
339 insertions.push((e.offset + e.length, false, "__".into()));
340 }
341 ME::Strike(e) => {
342 insertions.push((e.offset, true, "~~".into()));
343 insertions.push((e.offset + e.length, false, "~~".into()));
344 }
345 ME::Spoiler(e) => {
346 insertions.push((e.offset, true, "||".into()));
347 insertions.push((e.offset + e.length, false, "||".into()));
348 }
349 ME::Code(e) => {
350 insertions.push((e.offset, true, "`".into()));
351 insertions.push((e.offset + e.length, false, "`".into()));
352 }
353 ME::Pre(e) => {
354 let lang = e.language.trim();
355 insertions.push((e.offset, true, format!("```{lang}\n")));
356 insertions.push((e.offset + e.length, false, "\n```".into()));
357 }
358 ME::TextUrl(e) => {
359 insertions.push((e.offset, true, "[".into()));
360 insertions.push((e.offset + e.length, false, format!("]({})", e.url)));
361 }
362 ME::MentionName(e) => {
363 insertions.push((e.offset, true, "[".into()));
364 insertions.push((
365 e.offset + e.length,
366 false,
367 format!("](tg://user?id={})", e.user_id),
368 ));
369 }
370 ME::CustomEmoji(e) => {
371 insertions.push((e.offset, true, "", e.document_id),
376 ));
377 }
378 _ => {}
380 }
381 }
382
383 insertions.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
385 a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
386 });
387
388 let mut result = String::with_capacity(
389 text.len() + insertions.iter().map(|(_, _, s)| s.len()).sum::<usize>(),
390 );
391 let mut ins_idx = 0;
392 let mut utf16_pos: i32 = 0;
393
394 for ch in text.chars() {
395 while ins_idx < insertions.len() && insertions[ins_idx].0 <= utf16_pos {
396 result.push_str(&insertions[ins_idx].2);
397 ins_idx += 1;
398 }
399 match ch {
401 '*' | '_' | '~' | '|' | '[' | ']' | '(' | ')' | '`' | '\\' | '!' => {
402 result.push('\\');
403 result.push(ch);
404 }
405 c => result.push(c),
406 }
407 utf16_pos += ch.len_utf16() as i32;
408 }
409 while ins_idx < insertions.len() {
410 result.push_str(&insertions[ins_idx].2);
411 ins_idx += 1;
412 }
413
414 result
415}
416
417#[cfg(not(feature = "html5ever"))]
425pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
426 let mut out = String::with_capacity(html.len());
427 let mut ents = Vec::new();
428 let mut stack: Vec<(HtmlTag, i32, Option<String>)> = Vec::new();
429 let mut utf16_off: i32 = 0;
430
431 let bytes = html.as_bytes();
432 let len = bytes.len();
433 let mut i = 0;
434
435 while i < len {
436 if bytes[i] == b'<' {
437 let tag_start = i + 1;
438 let mut j = tag_start;
439 while j < len && bytes[j] != b'>' {
440 j += 1;
441 }
442 let tag_content = &html[tag_start..j];
443 i = j + 1;
444
445 let is_close = tag_content.starts_with('/');
446 let tag_str = if is_close {
447 tag_content[1..].trim()
448 } else {
449 tag_content.trim()
450 };
451 let (tag_name, attrs) = parse_tag(tag_str);
452
453 if is_close {
454 if let Some(pos) = stack.iter().rposition(|(t, _, _)| t.name() == tag_name) {
455 let (htag, start_off, extra) = stack.remove(pos);
456 let length = utf16_off - start_off;
457 if length > 0 {
458 let entity = match htag {
459 HtmlTag::Bold => Some(tl::enums::MessageEntity::Bold(
460 tl::types::MessageEntityBold {
461 offset: start_off,
462 length,
463 },
464 )),
465 HtmlTag::Italic => Some(tl::enums::MessageEntity::Italic(
466 tl::types::MessageEntityItalic {
467 offset: start_off,
468 length,
469 },
470 )),
471 HtmlTag::Underline => Some(tl::enums::MessageEntity::Underline(
472 tl::types::MessageEntityUnderline {
473 offset: start_off,
474 length,
475 },
476 )),
477 HtmlTag::Strike => Some(tl::enums::MessageEntity::Strike(
478 tl::types::MessageEntityStrike {
479 offset: start_off,
480 length,
481 },
482 )),
483 HtmlTag::Spoiler => Some(tl::enums::MessageEntity::Spoiler(
484 tl::types::MessageEntitySpoiler {
485 offset: start_off,
486 length,
487 },
488 )),
489 HtmlTag::Code => Some(tl::enums::MessageEntity::Code(
490 tl::types::MessageEntityCode {
491 offset: start_off,
492 length,
493 },
494 )),
495 HtmlTag::Pre => {
496 Some(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
497 offset: start_off,
498 length,
499 language: extra.unwrap_or_default(),
500 }))
501 }
502 HtmlTag::Link(url) => {
503 const PFX: &str = "tg://user?id=";
504 if let Some(stripped) = url.strip_prefix(PFX) {
505 stripped.parse::<i64>().ok().map(|uid| {
506 tl::enums::MessageEntity::MentionName(
507 tl::types::MessageEntityMentionName {
508 offset: start_off,
509 length,
510 user_id: uid,
511 },
512 )
513 })
514 } else {
515 Some(tl::enums::MessageEntity::TextUrl(
516 tl::types::MessageEntityTextUrl {
517 offset: start_off,
518 length,
519 url,
520 },
521 ))
522 }
523 }
524 HtmlTag::CustomEmoji(id) => {
525 Some(tl::enums::MessageEntity::CustomEmoji(
526 tl::types::MessageEntityCustomEmoji {
527 offset: start_off,
528 length,
529 document_id: id,
530 },
531 ))
532 }
533 HtmlTag::Unknown => None,
534 };
535 if let Some(e) = entity {
536 ents.push(e);
537 }
538 }
539 }
540 } else {
541 let htag = match tag_name {
542 "b" | "strong" => HtmlTag::Bold,
543 "i" | "em" => HtmlTag::Italic,
544 "u" => HtmlTag::Underline,
545 "s" | "del" | "strike" => HtmlTag::Strike,
546 "tg-spoiler" => HtmlTag::Spoiler,
547 "code" => HtmlTag::Code,
548 "pre" => HtmlTag::Pre,
549 "a" => HtmlTag::Link(
550 attrs
551 .iter()
552 .find(|(k, _)| k == "href")
553 .map(|(_, v)| v.clone())
554 .unwrap_or_default(),
555 ),
556 "tg-emoji" => HtmlTag::CustomEmoji(
557 attrs
558 .iter()
559 .find(|(k, _)| k == "emoji-id")
560 .and_then(|(_, v)| v.parse::<i64>().ok())
561 .unwrap_or(0),
562 ),
563 "br" => {
564 out.push('\n');
565 utf16_off += 1;
566 continue;
567 }
568 _ => HtmlTag::Unknown,
569 };
570 stack.push((htag, utf16_off, None));
571 }
572 } else {
573 let text_start = i;
574 while i < len && bytes[i] != b'<' {
575 i += 1;
576 }
577 let decoded = decode_html_entities(&html[text_start..i]);
578 for ch in decoded.chars() {
579 out.push(ch);
580 utf16_off += ch.len_utf16() as i32;
581 }
582 }
583 }
584
585 (out, ents)
586}
587
588#[cfg(not(feature = "html5ever"))]
589fn decode_html_entities(s: &str) -> String {
590 s.replace("&", "&")
591 .replace("<", "<")
592 .replace(">", ">")
593 .replace(""", "\"")
594 .replace("'", "'")
595 .replace(" ", "\u{00A0}")
596}
597
598#[cfg(not(feature = "html5ever"))]
599fn parse_tag(s: &str) -> (&str, Vec<(String, String)>) {
600 let mut parts = s.splitn(2, char::is_whitespace);
601 let name = parts.next().unwrap_or("").trim_end_matches('/');
602 let attrs = parse_attrs(parts.next().unwrap_or(""));
603 (name, attrs)
604}
605
606#[cfg(not(feature = "html5ever"))]
607fn parse_attrs(s: &str) -> Vec<(String, String)> {
608 let mut result = Vec::new();
609 let mut rem = s.trim();
610 while !rem.is_empty() {
611 if let Some(eq) = rem.find('=') {
612 let key = rem[..eq].trim().to_string();
613 rem = rem[eq + 1..].trim_start();
614 let (val, rest) = if let Some(s) = rem.strip_prefix('"') {
615 let end = s.find('"').map(|p| p + 1).unwrap_or(rem.len() - 1);
616 (rem[1..end].to_string(), &rem[end + 1..])
617 } else if let Some(s) = rem.strip_prefix('\'') {
618 let end = s.find('\'').map(|p| p + 1).unwrap_or(rem.len() - 1);
619 (rem[1..end].to_string(), &rem[end + 1..])
620 } else {
621 let end = rem.find(char::is_whitespace).unwrap_or(rem.len());
622 (rem[..end].to_string(), &rem[end..])
623 };
624 result.push((key, val));
625 rem = rest.trim_start();
626 } else {
627 break;
628 }
629 }
630 result
631}
632
633#[cfg(not(feature = "html5ever"))]
634#[allow(dead_code)]
635#[derive(Debug, Clone)]
636enum HtmlTag {
637 Bold,
638 Italic,
639 Underline,
640 Strike,
641 Spoiler,
642 Code,
643 Pre,
644 Link(String),
645 CustomEmoji(i64),
646 Unknown,
647}
648
649#[cfg(not(feature = "html5ever"))]
650impl HtmlTag {
651 fn name(&self) -> &str {
652 match self {
653 Self::Bold => "b",
654 Self::Italic => "i",
655 Self::Underline => "u",
656 Self::Strike => "s",
657 Self::Spoiler => "tg-spoiler",
658 Self::Code => "code",
659 Self::Pre => "pre",
660 Self::Link(_) => "a",
661 Self::CustomEmoji(_) => "tg-emoji",
662 Self::Unknown => "",
663 }
664 }
665}
666
667#[cfg(feature = "html5ever")]
675#[cfg_attr(docsrs, doc(cfg(feature = "html5ever")))]
676pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
677 use html5ever::tendril::StrTendril;
678 use html5ever::tokenizer::{
679 BufferQueue, Tag, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer,
680 };
681 use std::cell::Cell;
682
683 struct Sink {
684 text: Cell<String>,
685 entities: Cell<Vec<tl::enums::MessageEntity>>,
686 offset: Cell<i32>,
687 }
688
689 impl TokenSink for Sink {
690 type Handle = ();
691
692 fn process_token(&self, token: Token, _line: u64) -> TokenSinkResult<()> {
693 let mut text = self.text.take();
694 let mut entities = self.entities.take();
695 let mut offset = self.offset.get();
696
697 macro_rules! close_ent {
700 ($kind:ident) => {{
701 if let Some(idx) = entities
702 .iter()
703 .rposition(|e| matches!(e, tl::enums::MessageEntity::$kind(_)))
704 {
705 let closed_len = {
706 if let tl::enums::MessageEntity::$kind(ref mut inner) = entities[idx] {
707 inner.length = offset - inner.offset;
708 inner.length
709 } else {
710 unreachable!()
711 }
712 };
713 if closed_len == 0 {
714 entities.remove(idx);
715 }
716 }
717 }};
718 }
719
720 match token {
721 Token::TagToken(Tag {
723 kind: TagKind::StartTag,
724 name,
725 attrs,
726 ..
727 }) => {
728 let len0 = 0i32;
729 match name.as_ref() {
730 "b" | "strong" => entities.push(tl::enums::MessageEntity::Bold(
731 tl::types::MessageEntityBold {
732 offset,
733 length: len0,
734 },
735 )),
736 "i" | "em" => entities.push(tl::enums::MessageEntity::Italic(
737 tl::types::MessageEntityItalic {
738 offset,
739 length: len0,
740 },
741 )),
742 "u" => entities.push(tl::enums::MessageEntity::Underline(
743 tl::types::MessageEntityUnderline {
744 offset,
745 length: len0,
746 },
747 )),
748 "s" | "del" | "strike" => entities.push(tl::enums::MessageEntity::Strike(
749 tl::types::MessageEntityStrike {
750 offset,
751 length: len0,
752 },
753 )),
754 "tg-spoiler" => entities.push(tl::enums::MessageEntity::Spoiler(
755 tl::types::MessageEntitySpoiler {
756 offset,
757 length: len0,
758 },
759 )),
760 "code" => {
761 let in_pre = entities.last().map_or(
763 false,
764 |e| matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0),
765 );
766 if in_pre {
767 let lang = attrs
768 .iter()
769 .find(|a| a.name.local.as_ref() == "class")
770 .and_then(|a| {
771 let v: &str = a.value.as_ref();
772 v.strip_prefix("language-")
773 })
774 .map(|s| s.to_string())
775 .unwrap_or_default();
776 if let Some(tl::enums::MessageEntity::Pre(ref mut p)) =
777 entities.last_mut()
778 {
779 p.language = lang;
780 }
781 } else {
782 entities.push(tl::enums::MessageEntity::Code(
783 tl::types::MessageEntityCode {
784 offset,
785 length: len0,
786 },
787 ));
788 }
789 }
790 "pre" => entities.push(tl::enums::MessageEntity::Pre(
791 tl::types::MessageEntityPre {
792 offset,
793 length: len0,
794 language: String::new(),
795 },
796 )),
797 "a" => {
798 let href = attrs
799 .iter()
800 .find(|a| a.name.local.as_ref() == "href")
801 .map(|a| {
802 let v: &str = a.value.as_ref();
803 v.to_string()
804 })
805 .unwrap_or_default();
806 const MENTION_PFX: &str = "tg://user?id=";
807 if href.starts_with(MENTION_PFX) {
808 if let Ok(uid) = href[MENTION_PFX.len()..].parse::<i64>() {
809 entities.push(tl::enums::MessageEntity::MentionName(
810 tl::types::MessageEntityMentionName {
811 offset,
812 length: len0,
813 user_id: uid,
814 },
815 ));
816 }
817 } else {
818 entities.push(tl::enums::MessageEntity::TextUrl(
819 tl::types::MessageEntityTextUrl {
820 offset,
821 length: len0,
822 url: href,
823 },
824 ));
825 }
826 }
827 "tg-emoji" => {
828 let doc_id = attrs
829 .iter()
830 .find(|a| a.name.local.as_ref() == "emoji-id")
831 .and_then(|a| {
832 let v: &str = a.value.as_ref();
833 v.parse::<i64>().ok()
834 })
835 .unwrap_or(0);
836 entities.push(tl::enums::MessageEntity::CustomEmoji(
837 tl::types::MessageEntityCustomEmoji {
838 offset,
839 length: len0,
840 document_id: doc_id,
841 },
842 ));
843 }
844 "br" => {
845 text.push('\n');
846 offset += 1;
847 }
848 _ => {}
849 }
850 }
851
852 Token::TagToken(Tag {
854 kind: TagKind::EndTag,
855 name,
856 ..
857 }) => {
858 match name.as_ref() {
859 "b" | "strong" => close_ent!(Bold),
860 "i" | "em" => close_ent!(Italic),
861 "u" => close_ent!(Underline),
862 "s" | "del" | "strike" => close_ent!(Strike),
863 "tg-spoiler" => close_ent!(Spoiler),
864 "code" => {
865 let in_pre = entities.last().map_or(
867 false,
868 |e| matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0),
869 );
870 if !in_pre {
871 close_ent!(Code);
872 }
873 }
874 "pre" => close_ent!(Pre),
875 "a" => match entities.last() {
876 Some(tl::enums::MessageEntity::MentionName(_)) => {
877 close_ent!(MentionName)
878 }
879 _ => close_ent!(TextUrl),
880 },
881 "tg-emoji" => close_ent!(CustomEmoji),
882 _ => {}
883 }
884 }
885
886 Token::CharacterTokens(s) => {
888 let s_str: &str = s.as_ref();
889 offset += s_str.encode_utf16().count() as i32;
890 text.push_str(s_str);
891 }
892
893 _ => {}
894 }
895
896 self.text.replace(text);
897 self.entities.replace(entities);
898 self.offset.replace(offset);
899 TokenSinkResult::Continue
900 }
901 }
902
903 let mut input = BufferQueue::default();
904 input.push_back(StrTendril::from_slice(html).try_reinterpret().unwrap());
905
906 let tok = Tokenizer::new(
907 Sink {
908 text: Cell::new(String::with_capacity(html.len())),
909 entities: Cell::new(Vec::new()),
910 offset: Cell::new(0),
911 },
912 Default::default(),
913 );
914 let _ = tok.feed(&mut input);
915 tok.end();
916
917 let Sink { text, entities, .. } = tok.sink;
918 (text.take(), entities.take())
919}
920
921pub fn generate_html(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
925 use tl::enums::MessageEntity as ME;
926
927 let mut markers: Vec<(i32, bool, String)> = Vec::new();
928
929 for ent in entities {
930 let (off, len, open, close) = match ent {
931 ME::Bold(e) => (e.offset, e.length, "<b>".into(), "</b>".into()),
932 ME::Italic(e) => (e.offset, e.length, "<i>".into(), "</i>".into()),
933 ME::Underline(e) => (e.offset, e.length, "<u>".into(), "</u>".into()),
934 ME::Strike(e) => (e.offset, e.length, "<s>".into(), "</s>".into()),
935 ME::Spoiler(e) => (
936 e.offset,
937 e.length,
938 "<tg-spoiler>".into(),
939 "</tg-spoiler>".into(),
940 ),
941 ME::Code(e) => (e.offset, e.length, "<code>".into(), "</code>".into()),
942 ME::Pre(e) => {
943 let lang = if e.language.is_empty() {
944 String::new()
945 } else {
946 format!(" class=\"language-{}\"", e.language)
947 };
948 (
949 e.offset,
950 e.length,
951 format!("<pre><code{lang}>"),
952 "</code></pre>".into(),
953 )
954 }
955 ME::TextUrl(e) => (
956 e.offset,
957 e.length,
958 format!("<a href=\"{}\">", escape_html(&e.url)),
959 "</a>".into(),
960 ),
961 ME::MentionName(e) => (
962 e.offset,
963 e.length,
964 format!("<a href=\"tg://user?id={}\">", e.user_id),
965 "</a>".into(),
966 ),
967 ME::CustomEmoji(e) => (
968 e.offset,
969 e.length,
970 format!("<tg-emoji emoji-id=\"{}\">", e.document_id),
971 "</tg-emoji>".into(),
972 ),
973 _ => continue,
974 };
975 markers.push((off, true, open));
976 markers.push((off + len, false, close));
977 }
978
979 markers.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
980 a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
981 });
982
983 let mut result =
984 String::with_capacity(text.len() + markers.iter().map(|(_, _, s)| s.len()).sum::<usize>());
985 let mut marker_idx = 0;
986 let mut utf16_pos: i32 = 0;
987
988 for ch in text.chars() {
989 while marker_idx < markers.len() && markers[marker_idx].0 <= utf16_pos {
990 result.push_str(&markers[marker_idx].2);
991 marker_idx += 1;
992 }
993 match ch {
994 '&' => result.push_str("&"),
995 '<' => result.push_str("<"),
996 '>' => result.push_str(">"),
997 '"' => result.push_str("""),
998 c => result.push(c),
999 }
1000 utf16_pos += ch.len_utf16() as i32;
1001 }
1002 while marker_idx < markers.len() {
1003 result.push_str(&markers[marker_idx].2);
1004 marker_idx += 1;
1005 }
1006
1007 result
1008}
1009
1010fn escape_html(s: &str) -> String {
1011 s.replace('&', "&")
1012 .replace('<', "<")
1013 .replace('>', ">")
1014 .replace('"', """)
1015}
1016
1017#[cfg(test)]
1020mod tests {
1021 use super::*;
1022
1023 #[test]
1024 fn markdown_bold() {
1025 let (text, ents) = parse_markdown("Hello **world**!");
1026 assert_eq!(text, "Hello world!");
1027 assert_eq!(ents.len(), 1);
1028 if let tl::enums::MessageEntity::Bold(b) = &ents[0] {
1029 assert_eq!(b.offset, 6);
1030 assert_eq!(b.length, 5);
1031 } else {
1032 panic!("expected bold");
1033 }
1034 }
1035
1036 #[test]
1037 fn markdown_bold_single_asterisk() {
1038 let (text, ents) = parse_markdown("*bold*");
1039 assert_eq!(text, "bold");
1040 assert!(matches!(ents[0], tl::enums::MessageEntity::Bold(_)));
1041 }
1042
1043 #[test]
1044 fn markdown_italic_double_underscore() {
1045 let (text, ents) = parse_markdown("__italic__");
1046 assert_eq!(text, "italic");
1047 assert!(matches!(ents[0], tl::enums::MessageEntity::Italic(_)));
1048 }
1049
1050 #[test]
1051 fn markdown_italic_single_underscore() {
1052 let (text, ents) = parse_markdown("_italic_");
1053 assert_eq!(text, "italic");
1054 assert!(matches!(ents[0], tl::enums::MessageEntity::Italic(_)));
1055 }
1056
1057 #[test]
1058 fn markdown_inline_code() {
1059 let (text, ents) = parse_markdown("Use `foo()` to do it");
1060 assert_eq!(text, "Use foo() to do it");
1061 assert!(matches!(ents[0], tl::enums::MessageEntity::Code(_)));
1062 }
1063
1064 #[test]
1065 fn markdown_code_block_with_lang() {
1066 let (text, ents) = parse_markdown("```rust\nfn main() {}\n```");
1067 assert_eq!(text, "fn main() {}");
1068 if let tl::enums::MessageEntity::Pre(p) = &ents[0] {
1069 assert_eq!(p.language, "rust");
1070 assert_eq!(p.offset, 0);
1071 } else {
1072 panic!("expected pre");
1073 }
1074 }
1075
1076 #[test]
1077 fn markdown_code_block_no_lang() {
1078 let (text, ents) = parse_markdown("```\nhello\n```");
1079 assert_eq!(text, "hello");
1080 if let tl::enums::MessageEntity::Pre(p) = &ents[0] {
1081 assert_eq!(p.language, "");
1082 } else {
1083 panic!("expected pre");
1084 }
1085 }
1086
1087 #[test]
1088 fn markdown_strike() {
1089 let (text, ents) = parse_markdown("~~strike~~");
1090 assert_eq!(text, "strike");
1091 assert!(matches!(ents[0], tl::enums::MessageEntity::Strike(_)));
1092 }
1093
1094 #[test]
1095 fn markdown_spoiler() {
1096 let (text, ents) = parse_markdown("||spoiler||");
1097 assert_eq!(text, "spoiler");
1098 assert!(matches!(ents[0], tl::enums::MessageEntity::Spoiler(_)));
1099 }
1100
1101 #[test]
1102 fn markdown_text_url() {
1103 let (text, ents) = parse_markdown("[click](https://example.com)");
1104 assert_eq!(text, "click");
1105 if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
1106 assert_eq!(e.url, "https://example.com");
1107 } else {
1108 panic!("expected text url");
1109 }
1110 }
1111
1112 #[test]
1113 fn markdown_mention() {
1114 let (text, ents) = parse_markdown("[User](tg://user?id=42)");
1115 assert_eq!(text, "User");
1116 if let tl::enums::MessageEntity::MentionName(e) = &ents[0] {
1117 assert_eq!(e.user_id, 42);
1118 } else {
1119 panic!("expected mention name");
1120 }
1121 }
1122
1123 #[test]
1124 fn markdown_custom_emoji() {
1125 let (text, ents) = parse_markdown("");
1126 assert_eq!(text, "👍");
1127 if let tl::enums::MessageEntity::CustomEmoji(e) = &ents[0] {
1128 assert_eq!(e.document_id, 5368324170671202286);
1129 } else {
1130 panic!("expected custom emoji");
1131 }
1132 }
1133
1134 #[test]
1135 fn markdown_backslash_escape() {
1136 let (text, ents) = parse_markdown(r"\*not bold\*");
1137 assert_eq!(text, "*not bold*");
1138 assert!(ents.is_empty());
1139 }
1140
1141 #[test]
1142 fn markdown_nested() {
1143 let (text, ents) = parse_markdown("**bold __italic__ end**");
1144 assert_eq!(text, "bold italic end");
1145 assert_eq!(ents.len(), 2);
1146 assert!(
1147 ents.iter()
1148 .any(|e| matches!(e, tl::enums::MessageEntity::Bold(_)))
1149 );
1150 assert!(
1151 ents.iter()
1152 .any(|e| matches!(e, tl::enums::MessageEntity::Italic(_)))
1153 );
1154 }
1155
1156 #[test]
1157 fn generate_markdown_pre() {
1158 let entities = vec![tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
1159 offset: 0,
1160 length: 12,
1161 language: "rust".into(),
1162 })];
1163 let md = generate_markdown("fn main() {}", &entities);
1164 assert_eq!(md, "```rust\nfn main() {}\n```");
1165 }
1166
1167 #[test]
1168 fn generate_markdown_text_url() {
1169 let entities = vec![tl::enums::MessageEntity::TextUrl(
1170 tl::types::MessageEntityTextUrl {
1171 offset: 0,
1172 length: 5,
1173 url: "https://example.com".into(),
1174 },
1175 )];
1176 let md = generate_markdown("click", &entities);
1177 assert_eq!(md, "[click](https://example.com)");
1178 }
1179
1180 #[test]
1181 fn generate_markdown_mention() {
1182 let entities = vec![tl::enums::MessageEntity::MentionName(
1183 tl::types::MessageEntityMentionName {
1184 offset: 0,
1185 length: 4,
1186 user_id: 99,
1187 },
1188 )];
1189 let md = generate_markdown("User", &entities);
1190 assert_eq!(md, "[User](tg://user?id=99)");
1191 }
1192
1193 #[test]
1194 fn generate_markdown_custom_emoji() {
1195 let entities = vec![tl::enums::MessageEntity::CustomEmoji(
1196 tl::types::MessageEntityCustomEmoji {
1197 offset: 0,
1198 length: 2,
1199 document_id: 123456,
1200 },
1201 )];
1202 let md = generate_markdown("👍", &entities);
1203 assert_eq!(md, "");
1204 }
1205
1206 #[test]
1207 fn generate_markdown_escapes_special_chars() {
1208 let (_, empty): (_, Vec<_>) = (String::new(), vec![]);
1209 let md = generate_markdown("1 * 2 = 2", &empty);
1210 assert_eq!(md, r"1 \* 2 = 2");
1211 }
1212
1213 #[test]
1214 fn markdown_roundtrip_url() {
1215 let original = "click";
1216 let entities = vec![tl::enums::MessageEntity::TextUrl(
1217 tl::types::MessageEntityTextUrl {
1218 offset: 0,
1219 length: 5,
1220 url: "https://example.com".into(),
1221 },
1222 )];
1223 let md = generate_markdown(original, &entities);
1224 let (back, ents2) = parse_markdown(&md);
1225 assert_eq!(back, original);
1226 if let tl::enums::MessageEntity::TextUrl(e) = &ents2[0] {
1227 assert_eq!(e.url, "https://example.com");
1228 } else {
1229 panic!("roundtrip url failed");
1230 }
1231 }
1232
1233 #[test]
1234 fn html_bold_italic() {
1235 let (text, ents) = parse_html("<b>bold</b> and <i>italic</i>");
1236 assert_eq!(text, "bold and italic");
1237 assert_eq!(ents.len(), 2);
1238 }
1239
1240 #[test]
1241 fn html_link() {
1242 let (text, ents) = parse_html("<a href=\"https://example.com\">click</a>");
1243 assert_eq!(text, "click");
1244 if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
1245 assert_eq!(e.url, "https://example.com");
1246 } else {
1247 panic!("expected text url");
1248 }
1249 }
1250
1251 #[cfg(not(feature = "html5ever"))]
1253 #[test]
1254 fn html_entities_decoded() {
1255 let (text, _) = parse_html("A & B <3>");
1256 assert_eq!(text, "A & B <3>");
1257 }
1258
1259 #[test]
1260 fn generate_html_roundtrip() {
1261 let original = "Hello world";
1262 let entities = vec![tl::enums::MessageEntity::Bold(
1263 tl::types::MessageEntityBold {
1264 offset: 0,
1265 length: 5,
1266 },
1267 )];
1268 let html = generate_html(original, &entities);
1269 assert_eq!(html, "<b>Hello</b> world");
1270 let (back, ents2) = parse_html(&html);
1271 assert_eq!(back, original);
1272 assert_eq!(ents2.len(), 1);
1273 }
1274}