1use layer_tl_types as tl;
34
35pub fn parse_markdown(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
39 let mut out = String::with_capacity(text.len());
40 let mut ents = Vec::new();
41 let chars: Vec<char> = text.chars().collect();
42 let n = chars.len();
43 let mut i = 0;
44 let mut open_stack: Vec<(MarkdownTag, i32)> = Vec::new();
45 let mut utf16_off: i32 = 0;
46
47 macro_rules! push_char {
48 ($c:expr) => {{
49 let c: char = $c;
50 out.push(c);
51 utf16_off += c.len_utf16() as i32;
52 }};
53 }
54
55 while i < n {
56 if chars[i] == '\\' && i + 1 < n {
58 let next = chars[i + 1];
59 if matches!(
60 next,
61 '*' | '_' | '~' | '|' | '[' | ']' | '(' | ')' | '`' | '\\' | '!'
62 ) {
63 push_char!(next);
64 i += 2;
65 continue;
66 }
67 }
68
69 if i + 2 < n && chars[i] == '`' && chars[i + 1] == '`' && chars[i + 2] == '`' {
71 let start = i + 3;
72 let mut j = start;
73 while j + 2 < n {
74 if chars[j] == '`' && chars[j + 1] == '`' && chars[j + 2] == '`' {
75 break;
76 }
77 j += 1;
78 }
79 if j + 2 < n {
80 let block: String = chars[start..j].iter().collect();
81 let (lang, code) = if let Some(nl) = block.find('\n') {
82 (block[..nl].trim().to_string(), block[nl + 1..].to_string())
83 } else {
84 (String::new(), block)
85 };
86 let code_off = utf16_off;
87 let code_utf16: i32 = code.encode_utf16().count() as i32;
88 ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
89 offset: code_off,
90 length: code_utf16,
91 language: lang,
92 }));
93 for c in code.chars() {
94 push_char!(c);
95 }
96 i = j + 3;
97 continue;
98 }
99 }
100
101 if chars[i] == '`' {
103 let start = i + 1;
104 let mut j = start;
105 while j < n && chars[j] != '`' {
106 j += 1;
107 }
108 if j < n {
109 let code: String = chars[start..j].iter().collect();
110 let code_off = utf16_off;
111 let code_utf16: i32 = code.encode_utf16().count() as i32;
112 ents.push(tl::enums::MessageEntity::Code(
113 tl::types::MessageEntityCode {
114 offset: code_off,
115 length: code_utf16,
116 },
117 ));
118 for c in code.chars() {
119 push_char!(c);
120 }
121 i = j + 1;
122 continue;
123 }
124 }
125
126 if chars[i] == '!' && i + 1 < n && chars[i + 1] == '[' {
128 let text_start = i + 2;
129 let mut j = text_start;
130 while j < n && chars[j] != ']' {
131 j += 1;
132 }
133 if j < n && j + 1 < n && chars[j + 1] == '(' {
134 let link_start = j + 2;
135 let mut k = link_start;
136 while k < n && chars[k] != ')' {
137 k += 1;
138 }
139 if k < n {
140 let inner_text: String = chars[text_start..j].iter().collect();
141 let url: String = chars[link_start..k].iter().collect();
142 const EMOJI_PFX: &str = "tg://emoji?id=";
143 if let Some(stripped) = url.strip_prefix(EMOJI_PFX) {
144 if let Ok(doc_id) = stripped.parse::<i64>() {
145 let ent_off = utf16_off;
146 for c in inner_text.chars() {
147 push_char!(c);
148 }
149 ents.push(tl::enums::MessageEntity::CustomEmoji(
150 tl::types::MessageEntityCustomEmoji {
151 offset: ent_off,
152 length: utf16_off - ent_off,
153 document_id: doc_id,
154 },
155 ));
156 i = k + 1;
157 continue;
158 }
159 }
160 }
161 }
162 }
163
164 if chars[i] == '[' {
166 let text_start = i + 1;
167 let mut j = text_start;
168 let mut depth = 1i32;
169 while j < n {
170 if chars[j] == '[' {
171 depth += 1;
172 }
173 if chars[j] == ']' {
174 depth -= 1;
175 if depth == 0 {
176 break;
177 }
178 }
179 j += 1;
180 }
181 if j < n && j + 1 < n && chars[j + 1] == '(' {
182 let link_start = j + 2;
183 let mut k = link_start;
184 while k < n && chars[k] != ')' {
185 k += 1;
186 }
187 if k < n {
188 let inner_text: String = chars[text_start..j].iter().collect();
189 let url: String = chars[link_start..k].iter().collect();
190 const MENTION_PFX: &str = "tg://user?id=";
191 let ent_off = utf16_off;
192 for c in inner_text.chars() {
193 push_char!(c);
194 }
195 let ent_len = utf16_off - ent_off;
196 if let Some(stripped) = url.strip_prefix(MENTION_PFX) {
197 if let Ok(uid) = stripped.parse::<i64>() {
198 ents.push(tl::enums::MessageEntity::MentionName(
199 tl::types::MessageEntityMentionName {
200 offset: ent_off,
201 length: ent_len,
202 user_id: uid,
203 },
204 ));
205 }
206 } else {
207 ents.push(tl::enums::MessageEntity::TextUrl(
208 tl::types::MessageEntityTextUrl {
209 offset: ent_off,
210 length: ent_len,
211 url,
212 },
213 ));
214 }
215 i = k + 1;
216 continue;
217 }
218 }
219 }
220
221 let two: Option<MarkdownTag> = if i + 1 < n {
223 match [chars[i], chars[i + 1]] {
224 ['*', '*'] => Some(MarkdownTag::Bold),
225 ['_', '_'] => Some(MarkdownTag::Italic),
226 ['~', '~'] => Some(MarkdownTag::Strike),
227 ['|', '|'] => Some(MarkdownTag::Spoiler),
228 _ => None,
229 }
230 } else {
231 None
232 };
233
234 if let Some(tag) = two {
235 if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
236 let (_, start_off) = open_stack.remove(pos);
237 let length = utf16_off - start_off;
238 if length > 0 {
239 ents.push(make_entity(tag, start_off, length));
240 }
241 } else {
242 open_stack.push((tag, utf16_off));
243 }
244 i += 2;
245 continue;
246 }
247
248 let one: Option<MarkdownTag> = match chars[i] {
251 '*' => Some(MarkdownTag::Bold),
252 '_' => Some(MarkdownTag::Italic),
253 _ => None,
254 };
255
256 if let Some(tag) = one {
257 if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
258 let (_, start_off) = open_stack.remove(pos);
259 let length = utf16_off - start_off;
260 if length > 0 {
261 ents.push(make_entity(tag, start_off, length));
262 }
263 } else {
264 open_stack.push((tag, utf16_off));
265 }
266 i += 1;
267 continue;
268 }
269
270 push_char!(chars[i]);
271 i += 1;
272 }
273
274 (out, ents)
275}
276
277fn make_entity(tag: MarkdownTag, offset: i32, length: i32) -> tl::enums::MessageEntity {
278 match tag {
279 MarkdownTag::Bold => {
280 tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset, length })
281 }
282 MarkdownTag::Italic => {
283 tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset, length })
284 }
285 MarkdownTag::Strike => {
286 tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset, length })
287 }
288 MarkdownTag::Spoiler => {
289 tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset, length })
290 }
291 }
292}
293
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295enum MarkdownTag {
296 Bold,
297 Italic,
298 Strike,
299 Spoiler,
300}
301
302pub fn generate_markdown(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
307 use tl::enums::MessageEntity as ME;
308
309 let mut insertions: Vec<(i32, bool, String)> = Vec::new();
312
313 for ent in entities {
314 match ent {
315 ME::Bold(e) => {
316 insertions.push((e.offset, true, "**".into()));
317 insertions.push((e.offset + e.length, false, "**".into()));
318 }
319 ME::Italic(e) => {
320 insertions.push((e.offset, true, "__".into()));
321 insertions.push((e.offset + e.length, false, "__".into()));
322 }
323 ME::Strike(e) => {
324 insertions.push((e.offset, true, "~~".into()));
325 insertions.push((e.offset + e.length, false, "~~".into()));
326 }
327 ME::Spoiler(e) => {
328 insertions.push((e.offset, true, "||".into()));
329 insertions.push((e.offset + e.length, false, "||".into()));
330 }
331 ME::Code(e) => {
332 insertions.push((e.offset, true, "`".into()));
333 insertions.push((e.offset + e.length, false, "`".into()));
334 }
335 ME::Pre(e) => {
336 let lang = e.language.trim();
337 insertions.push((e.offset, true, format!("```{lang}\n")));
338 insertions.push((e.offset + e.length, false, "\n```".into()));
339 }
340 ME::TextUrl(e) => {
341 insertions.push((e.offset, true, "[".into()));
342 insertions.push((e.offset + e.length, false, format!("]({})", e.url)));
343 }
344 ME::MentionName(e) => {
345 insertions.push((e.offset, true, "[".into()));
346 insertions.push((
347 e.offset + e.length,
348 false,
349 format!("](tg://user?id={})", e.user_id),
350 ));
351 }
352 ME::CustomEmoji(e) => {
353 insertions.push((e.offset, true, "", e.document_id),
358 ));
359 }
360 _ => {}
362 }
363 }
364
365 insertions.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
367 a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
368 });
369
370 let mut result = String::with_capacity(
371 text.len() + insertions.iter().map(|(_, _, s)| s.len()).sum::<usize>(),
372 );
373 let mut ins_idx = 0;
374 let mut utf16_pos: i32 = 0;
375
376 for ch in text.chars() {
377 while ins_idx < insertions.len() && insertions[ins_idx].0 <= utf16_pos {
378 result.push_str(&insertions[ins_idx].2);
379 ins_idx += 1;
380 }
381 match ch {
383 '*' | '_' | '~' | '|' | '[' | ']' | '(' | ')' | '`' | '\\' | '!' => {
384 result.push('\\');
385 result.push(ch);
386 }
387 c => result.push(c),
388 }
389 utf16_pos += ch.len_utf16() as i32;
390 }
391 while ins_idx < insertions.len() {
392 result.push_str(&insertions[ins_idx].2);
393 ins_idx += 1;
394 }
395
396 result
397}
398
399#[cfg(not(feature = "html5ever"))]
407pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
408 let mut out = String::with_capacity(html.len());
409 let mut ents = Vec::new();
410 let mut stack: Vec<(HtmlTag, i32, Option<String>)> = Vec::new();
411 let mut utf16_off: i32 = 0;
412
413 let bytes = html.as_bytes();
414 let len = bytes.len();
415 let mut i = 0;
416
417 while i < len {
418 if bytes[i] == b'<' {
419 let tag_start = i + 1;
420 let mut j = tag_start;
421 while j < len && bytes[j] != b'>' {
422 j += 1;
423 }
424 let tag_content = &html[tag_start..j];
425 i = j + 1;
426
427 let is_close = tag_content.starts_with('/');
428 let tag_str = if is_close {
429 tag_content[1..].trim()
430 } else {
431 tag_content.trim()
432 };
433 let (tag_name, attrs) = parse_tag(tag_str);
434
435 if is_close {
436 if let Some(pos) = stack.iter().rposition(|(t, _, _)| t.name() == tag_name) {
437 let (htag, start_off, extra) = stack.remove(pos);
438 let length = utf16_off - start_off;
439 if length > 0 {
440 let entity = match htag {
441 HtmlTag::Bold => Some(tl::enums::MessageEntity::Bold(
442 tl::types::MessageEntityBold {
443 offset: start_off,
444 length,
445 },
446 )),
447 HtmlTag::Italic => Some(tl::enums::MessageEntity::Italic(
448 tl::types::MessageEntityItalic {
449 offset: start_off,
450 length,
451 },
452 )),
453 HtmlTag::Underline => Some(tl::enums::MessageEntity::Underline(
454 tl::types::MessageEntityUnderline {
455 offset: start_off,
456 length,
457 },
458 )),
459 HtmlTag::Strike => Some(tl::enums::MessageEntity::Strike(
460 tl::types::MessageEntityStrike {
461 offset: start_off,
462 length,
463 },
464 )),
465 HtmlTag::Spoiler => Some(tl::enums::MessageEntity::Spoiler(
466 tl::types::MessageEntitySpoiler {
467 offset: start_off,
468 length,
469 },
470 )),
471 HtmlTag::Code => Some(tl::enums::MessageEntity::Code(
472 tl::types::MessageEntityCode {
473 offset: start_off,
474 length,
475 },
476 )),
477 HtmlTag::Pre => {
478 Some(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
479 offset: start_off,
480 length,
481 language: extra.unwrap_or_default(),
482 }))
483 }
484 HtmlTag::Link(url) => {
485 const PFX: &str = "tg://user?id=";
486 if let Some(stripped) = url.strip_prefix(PFX) {
487 stripped.parse::<i64>().ok().map(|uid| {
488 tl::enums::MessageEntity::MentionName(
489 tl::types::MessageEntityMentionName {
490 offset: start_off,
491 length,
492 user_id: uid,
493 },
494 )
495 })
496 } else {
497 Some(tl::enums::MessageEntity::TextUrl(
498 tl::types::MessageEntityTextUrl {
499 offset: start_off,
500 length,
501 url,
502 },
503 ))
504 }
505 }
506 HtmlTag::CustomEmoji(id) => {
507 Some(tl::enums::MessageEntity::CustomEmoji(
508 tl::types::MessageEntityCustomEmoji {
509 offset: start_off,
510 length,
511 document_id: id,
512 },
513 ))
514 }
515 HtmlTag::Unknown => None,
516 };
517 if let Some(e) = entity {
518 ents.push(e);
519 }
520 }
521 }
522 } else {
523 let htag = match tag_name {
524 "b" | "strong" => HtmlTag::Bold,
525 "i" | "em" => HtmlTag::Italic,
526 "u" => HtmlTag::Underline,
527 "s" | "del" | "strike" => HtmlTag::Strike,
528 "tg-spoiler" => HtmlTag::Spoiler,
529 "code" => HtmlTag::Code,
530 "pre" => HtmlTag::Pre,
531 "a" => HtmlTag::Link(
532 attrs
533 .iter()
534 .find(|(k, _)| k == "href")
535 .map(|(_, v)| v.clone())
536 .unwrap_or_default(),
537 ),
538 "tg-emoji" => HtmlTag::CustomEmoji(
539 attrs
540 .iter()
541 .find(|(k, _)| k == "emoji-id")
542 .and_then(|(_, v)| v.parse::<i64>().ok())
543 .unwrap_or(0),
544 ),
545 "br" => {
546 out.push('\n');
547 utf16_off += 1;
548 continue;
549 }
550 _ => HtmlTag::Unknown,
551 };
552 stack.push((htag, utf16_off, None));
553 }
554 } else {
555 let text_start = i;
556 while i < len && bytes[i] != b'<' {
557 i += 1;
558 }
559 let decoded = decode_html_entities(&html[text_start..i]);
560 for ch in decoded.chars() {
561 out.push(ch);
562 utf16_off += ch.len_utf16() as i32;
563 }
564 }
565 }
566
567 (out, ents)
568}
569
570#[cfg(not(feature = "html5ever"))]
571fn decode_html_entities(s: &str) -> String {
572 s.replace("&", "&")
573 .replace("<", "<")
574 .replace(">", ">")
575 .replace(""", "\"")
576 .replace("'", "'")
577 .replace(" ", "\u{00A0}")
578}
579
580#[cfg(not(feature = "html5ever"))]
581fn parse_tag(s: &str) -> (&str, Vec<(String, String)>) {
582 let mut parts = s.splitn(2, char::is_whitespace);
583 let name = parts.next().unwrap_or("").trim_end_matches('/');
584 let attrs = parse_attrs(parts.next().unwrap_or(""));
585 (name, attrs)
586}
587
588#[cfg(not(feature = "html5ever"))]
589fn parse_attrs(s: &str) -> Vec<(String, String)> {
590 let mut result = Vec::new();
591 let mut rem = s.trim();
592 while !rem.is_empty() {
593 if let Some(eq) = rem.find('=') {
594 let key = rem[..eq].trim().to_string();
595 rem = rem[eq + 1..].trim_start();
596 let (val, rest) = if let Some(s) = rem.strip_prefix('"') {
597 let end = s.find('"').map(|p| p + 1).unwrap_or(rem.len() - 1);
598 (rem[1..end].to_string(), &rem[end + 1..])
599 } else if let Some(s) = rem.strip_prefix('\'') {
600 let end = s.find('\'').map(|p| p + 1).unwrap_or(rem.len() - 1);
601 (rem[1..end].to_string(), &rem[end + 1..])
602 } else {
603 let end = rem.find(char::is_whitespace).unwrap_or(rem.len());
604 (rem[..end].to_string(), &rem[end..])
605 };
606 result.push((key, val));
607 rem = rest.trim_start();
608 } else {
609 break;
610 }
611 }
612 result
613}
614
615#[cfg(not(feature = "html5ever"))]
616#[allow(dead_code)]
617#[derive(Debug, Clone)]
618enum HtmlTag {
619 Bold,
620 Italic,
621 Underline,
622 Strike,
623 Spoiler,
624 Code,
625 Pre,
626 Link(String),
627 CustomEmoji(i64),
628 Unknown,
629}
630
631#[cfg(not(feature = "html5ever"))]
632impl HtmlTag {
633 fn name(&self) -> &str {
634 match self {
635 Self::Bold => "b",
636 Self::Italic => "i",
637 Self::Underline => "u",
638 Self::Strike => "s",
639 Self::Spoiler => "tg-spoiler",
640 Self::Code => "code",
641 Self::Pre => "pre",
642 Self::Link(_) => "a",
643 Self::CustomEmoji(_) => "tg-emoji",
644 Self::Unknown => "",
645 }
646 }
647}
648
649#[cfg(feature = "html5ever")]
657#[cfg_attr(docsrs, doc(cfg(feature = "html5ever")))]
658pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
659 use html5ever::tendril::StrTendril;
660 use html5ever::tokenizer::{
661 BufferQueue, Tag, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer,
662 };
663 use std::cell::Cell;
664
665 struct Sink {
666 text: Cell<String>,
667 entities: Cell<Vec<tl::enums::MessageEntity>>,
668 offset: Cell<i32>,
669 }
670
671 impl TokenSink for Sink {
672 type Handle = ();
673
674 fn process_token(&self, token: Token, _line: u64) -> TokenSinkResult<()> {
675 let mut text = self.text.take();
676 let mut entities = self.entities.take();
677 let mut offset = self.offset.get();
678
679 macro_rules! close_ent {
682 ($kind:ident) => {{
683 if let Some(idx) = entities
684 .iter()
685 .rposition(|e| matches!(e, tl::enums::MessageEntity::$kind(_)))
686 {
687 let closed_len = {
688 if let tl::enums::MessageEntity::$kind(ref mut inner) = entities[idx] {
689 inner.length = offset - inner.offset;
690 inner.length
691 } else {
692 unreachable!()
693 }
694 };
695 if closed_len == 0 {
696 entities.remove(idx);
697 }
698 }
699 }};
700 }
701
702 match token {
703 Token::TagToken(Tag {
705 kind: TagKind::StartTag,
706 name,
707 attrs,
708 ..
709 }) => {
710 let len0 = 0i32;
711 match name.as_ref() {
712 "b" | "strong" => entities.push(tl::enums::MessageEntity::Bold(
713 tl::types::MessageEntityBold {
714 offset,
715 length: len0,
716 },
717 )),
718 "i" | "em" => entities.push(tl::enums::MessageEntity::Italic(
719 tl::types::MessageEntityItalic {
720 offset,
721 length: len0,
722 },
723 )),
724 "u" => entities.push(tl::enums::MessageEntity::Underline(
725 tl::types::MessageEntityUnderline {
726 offset,
727 length: len0,
728 },
729 )),
730 "s" | "del" | "strike" => entities.push(tl::enums::MessageEntity::Strike(
731 tl::types::MessageEntityStrike {
732 offset,
733 length: len0,
734 },
735 )),
736 "tg-spoiler" => entities.push(tl::enums::MessageEntity::Spoiler(
737 tl::types::MessageEntitySpoiler {
738 offset,
739 length: len0,
740 },
741 )),
742 "code" => {
743 let in_pre = entities.last().map_or(
745 false,
746 |e| matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0),
747 );
748 if in_pre {
749 let lang = attrs
750 .iter()
751 .find(|a| a.name.local.as_ref() == "class")
752 .and_then(|a| {
753 let v: &str = a.value.as_ref();
754 v.strip_prefix("language-")
755 })
756 .map(|s| s.to_string())
757 .unwrap_or_default();
758 if let Some(tl::enums::MessageEntity::Pre(ref mut p)) =
759 entities.last_mut()
760 {
761 p.language = lang;
762 }
763 } else {
764 entities.push(tl::enums::MessageEntity::Code(
765 tl::types::MessageEntityCode {
766 offset,
767 length: len0,
768 },
769 ));
770 }
771 }
772 "pre" => entities.push(tl::enums::MessageEntity::Pre(
773 tl::types::MessageEntityPre {
774 offset,
775 length: len0,
776 language: String::new(),
777 },
778 )),
779 "a" => {
780 let href = attrs
781 .iter()
782 .find(|a| a.name.local.as_ref() == "href")
783 .map(|a| {
784 let v: &str = a.value.as_ref();
785 v.to_string()
786 })
787 .unwrap_or_default();
788 const MENTION_PFX: &str = "tg://user?id=";
789 if href.starts_with(MENTION_PFX) {
790 if let Ok(uid) = href[MENTION_PFX.len()..].parse::<i64>() {
791 entities.push(tl::enums::MessageEntity::MentionName(
792 tl::types::MessageEntityMentionName {
793 offset,
794 length: len0,
795 user_id: uid,
796 },
797 ));
798 }
799 } else {
800 entities.push(tl::enums::MessageEntity::TextUrl(
801 tl::types::MessageEntityTextUrl {
802 offset,
803 length: len0,
804 url: href,
805 },
806 ));
807 }
808 }
809 "tg-emoji" => {
810 let doc_id = attrs
811 .iter()
812 .find(|a| a.name.local.as_ref() == "emoji-id")
813 .and_then(|a| {
814 let v: &str = a.value.as_ref();
815 v.parse::<i64>().ok()
816 })
817 .unwrap_or(0);
818 entities.push(tl::enums::MessageEntity::CustomEmoji(
819 tl::types::MessageEntityCustomEmoji {
820 offset,
821 length: len0,
822 document_id: doc_id,
823 },
824 ));
825 }
826 "br" => {
827 text.push('\n');
828 offset += 1;
829 }
830 _ => {}
831 }
832 }
833
834 Token::TagToken(Tag {
836 kind: TagKind::EndTag,
837 name,
838 ..
839 }) => {
840 match name.as_ref() {
841 "b" | "strong" => close_ent!(Bold),
842 "i" | "em" => close_ent!(Italic),
843 "u" => close_ent!(Underline),
844 "s" | "del" | "strike" => close_ent!(Strike),
845 "tg-spoiler" => close_ent!(Spoiler),
846 "code" => {
847 let in_pre = entities.last().map_or(
849 false,
850 |e| matches!(e, tl::enums::MessageEntity::Pre(p) if p.length == 0),
851 );
852 if !in_pre {
853 close_ent!(Code);
854 }
855 }
856 "pre" => close_ent!(Pre),
857 "a" => match entities.last() {
858 Some(tl::enums::MessageEntity::MentionName(_)) => {
859 close_ent!(MentionName)
860 }
861 _ => close_ent!(TextUrl),
862 },
863 "tg-emoji" => close_ent!(CustomEmoji),
864 _ => {}
865 }
866 }
867
868 Token::CharacterTokens(s) => {
870 let s_str: &str = s.as_ref();
871 offset += s_str.encode_utf16().count() as i32;
872 text.push_str(s_str);
873 }
874
875 _ => {}
876 }
877
878 self.text.replace(text);
879 self.entities.replace(entities);
880 self.offset.replace(offset);
881 TokenSinkResult::Continue
882 }
883 }
884
885 let mut input = BufferQueue::default();
886 input.push_back(StrTendril::from_slice(html).try_reinterpret().unwrap());
887
888 let tok = Tokenizer::new(
889 Sink {
890 text: Cell::new(String::with_capacity(html.len())),
891 entities: Cell::new(Vec::new()),
892 offset: Cell::new(0),
893 },
894 Default::default(),
895 );
896 let _ = tok.feed(&mut input);
897 tok.end();
898
899 let Sink { text, entities, .. } = tok.sink;
900 (text.take(), entities.take())
901}
902
903pub fn generate_html(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
907 use tl::enums::MessageEntity as ME;
908
909 let mut markers: Vec<(i32, bool, String)> = Vec::new();
910
911 for ent in entities {
912 let (off, len, open, close) = match ent {
913 ME::Bold(e) => (e.offset, e.length, "<b>".into(), "</b>".into()),
914 ME::Italic(e) => (e.offset, e.length, "<i>".into(), "</i>".into()),
915 ME::Underline(e) => (e.offset, e.length, "<u>".into(), "</u>".into()),
916 ME::Strike(e) => (e.offset, e.length, "<s>".into(), "</s>".into()),
917 ME::Spoiler(e) => (
918 e.offset,
919 e.length,
920 "<tg-spoiler>".into(),
921 "</tg-spoiler>".into(),
922 ),
923 ME::Code(e) => (e.offset, e.length, "<code>".into(), "</code>".into()),
924 ME::Pre(e) => {
925 let lang = if e.language.is_empty() {
926 String::new()
927 } else {
928 format!(" class=\"language-{}\"", e.language)
929 };
930 (
931 e.offset,
932 e.length,
933 format!("<pre><code{lang}>"),
934 "</code></pre>".into(),
935 )
936 }
937 ME::TextUrl(e) => (
938 e.offset,
939 e.length,
940 format!("<a href=\"{}\">", escape_html(&e.url)),
941 "</a>".into(),
942 ),
943 ME::MentionName(e) => (
944 e.offset,
945 e.length,
946 format!("<a href=\"tg://user?id={}\">", e.user_id),
947 "</a>".into(),
948 ),
949 ME::CustomEmoji(e) => (
950 e.offset,
951 e.length,
952 format!("<tg-emoji emoji-id=\"{}\">", e.document_id),
953 "</tg-emoji>".into(),
954 ),
955 _ => continue,
956 };
957 markers.push((off, true, open));
958 markers.push((off + len, false, close));
959 }
960
961 markers.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
962 a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
963 });
964
965 let mut result =
966 String::with_capacity(text.len() + markers.iter().map(|(_, _, s)| s.len()).sum::<usize>());
967 let mut marker_idx = 0;
968 let mut utf16_pos: i32 = 0;
969
970 for ch in text.chars() {
971 while marker_idx < markers.len() && markers[marker_idx].0 <= utf16_pos {
972 result.push_str(&markers[marker_idx].2);
973 marker_idx += 1;
974 }
975 match ch {
976 '&' => result.push_str("&"),
977 '<' => result.push_str("<"),
978 '>' => result.push_str(">"),
979 '"' => result.push_str("""),
980 c => result.push(c),
981 }
982 utf16_pos += ch.len_utf16() as i32;
983 }
984 while marker_idx < markers.len() {
985 result.push_str(&markers[marker_idx].2);
986 marker_idx += 1;
987 }
988
989 result
990}
991
992fn escape_html(s: &str) -> String {
993 s.replace('&', "&")
994 .replace('<', "<")
995 .replace('>', ">")
996 .replace('"', """)
997}
998
999#[cfg(test)]
1002mod tests {
1003 use super::*;
1004
1005 #[test]
1006 fn markdown_bold() {
1007 let (text, ents) = parse_markdown("Hello **world**!");
1008 assert_eq!(text, "Hello world!");
1009 assert_eq!(ents.len(), 1);
1010 if let tl::enums::MessageEntity::Bold(b) = &ents[0] {
1011 assert_eq!(b.offset, 6);
1012 assert_eq!(b.length, 5);
1013 } else {
1014 panic!("expected bold");
1015 }
1016 }
1017
1018 #[test]
1019 fn markdown_bold_single_asterisk() {
1020 let (text, ents) = parse_markdown("*bold*");
1021 assert_eq!(text, "bold");
1022 assert!(matches!(ents[0], tl::enums::MessageEntity::Bold(_)));
1023 }
1024
1025 #[test]
1026 fn markdown_italic_double_underscore() {
1027 let (text, ents) = parse_markdown("__italic__");
1028 assert_eq!(text, "italic");
1029 assert!(matches!(ents[0], tl::enums::MessageEntity::Italic(_)));
1030 }
1031
1032 #[test]
1033 fn markdown_italic_single_underscore() {
1034 let (text, ents) = parse_markdown("_italic_");
1035 assert_eq!(text, "italic");
1036 assert!(matches!(ents[0], tl::enums::MessageEntity::Italic(_)));
1037 }
1038
1039 #[test]
1040 fn markdown_inline_code() {
1041 let (text, ents) = parse_markdown("Use `foo()` to do it");
1042 assert_eq!(text, "Use foo() to do it");
1043 assert!(matches!(ents[0], tl::enums::MessageEntity::Code(_)));
1044 }
1045
1046 #[test]
1047 fn markdown_code_block_with_lang() {
1048 let (text, ents) = parse_markdown("```rust\nfn main() {}\n```");
1049 assert_eq!(text, "fn main() {}");
1050 if let tl::enums::MessageEntity::Pre(p) = &ents[0] {
1051 assert_eq!(p.language, "rust");
1052 assert_eq!(p.offset, 0);
1053 } else {
1054 panic!("expected pre");
1055 }
1056 }
1057
1058 #[test]
1059 fn markdown_code_block_no_lang() {
1060 let (text, ents) = parse_markdown("```\nhello\n```");
1061 assert_eq!(text, "hello");
1062 if let tl::enums::MessageEntity::Pre(p) = &ents[0] {
1063 assert_eq!(p.language, "");
1064 } else {
1065 panic!("expected pre");
1066 }
1067 }
1068
1069 #[test]
1070 fn markdown_strike() {
1071 let (text, ents) = parse_markdown("~~strike~~");
1072 assert_eq!(text, "strike");
1073 assert!(matches!(ents[0], tl::enums::MessageEntity::Strike(_)));
1074 }
1075
1076 #[test]
1077 fn markdown_spoiler() {
1078 let (text, ents) = parse_markdown("||spoiler||");
1079 assert_eq!(text, "spoiler");
1080 assert!(matches!(ents[0], tl::enums::MessageEntity::Spoiler(_)));
1081 }
1082
1083 #[test]
1084 fn markdown_text_url() {
1085 let (text, ents) = parse_markdown("[click](https://example.com)");
1086 assert_eq!(text, "click");
1087 if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
1088 assert_eq!(e.url, "https://example.com");
1089 } else {
1090 panic!("expected text url");
1091 }
1092 }
1093
1094 #[test]
1095 fn markdown_mention() {
1096 let (text, ents) = parse_markdown("[User](tg://user?id=42)");
1097 assert_eq!(text, "User");
1098 if let tl::enums::MessageEntity::MentionName(e) = &ents[0] {
1099 assert_eq!(e.user_id, 42);
1100 } else {
1101 panic!("expected mention name");
1102 }
1103 }
1104
1105 #[test]
1106 fn markdown_custom_emoji() {
1107 let (text, ents) = parse_markdown("");
1108 assert_eq!(text, "👍");
1109 if let tl::enums::MessageEntity::CustomEmoji(e) = &ents[0] {
1110 assert_eq!(e.document_id, 5368324170671202286);
1111 } else {
1112 panic!("expected custom emoji");
1113 }
1114 }
1115
1116 #[test]
1117 fn markdown_backslash_escape() {
1118 let (text, ents) = parse_markdown(r"\*not bold\*");
1119 assert_eq!(text, "*not bold*");
1120 assert!(ents.is_empty());
1121 }
1122
1123 #[test]
1124 fn markdown_nested() {
1125 let (text, ents) = parse_markdown("**bold __italic__ end**");
1126 assert_eq!(text, "bold italic end");
1127 assert_eq!(ents.len(), 2);
1128 assert!(
1129 ents.iter()
1130 .any(|e| matches!(e, tl::enums::MessageEntity::Bold(_)))
1131 );
1132 assert!(
1133 ents.iter()
1134 .any(|e| matches!(e, tl::enums::MessageEntity::Italic(_)))
1135 );
1136 }
1137
1138 #[test]
1139 fn generate_markdown_pre() {
1140 let entities = vec![tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
1141 offset: 0,
1142 length: 12,
1143 language: "rust".into(),
1144 })];
1145 let md = generate_markdown("fn main() {}", &entities);
1146 assert_eq!(md, "```rust\nfn main() {}\n```");
1147 }
1148
1149 #[test]
1150 fn generate_markdown_text_url() {
1151 let entities = vec![tl::enums::MessageEntity::TextUrl(
1152 tl::types::MessageEntityTextUrl {
1153 offset: 0,
1154 length: 5,
1155 url: "https://example.com".into(),
1156 },
1157 )];
1158 let md = generate_markdown("click", &entities);
1159 assert_eq!(md, "[click](https://example.com)");
1160 }
1161
1162 #[test]
1163 fn generate_markdown_mention() {
1164 let entities = vec![tl::enums::MessageEntity::MentionName(
1165 tl::types::MessageEntityMentionName {
1166 offset: 0,
1167 length: 4,
1168 user_id: 99,
1169 },
1170 )];
1171 let md = generate_markdown("User", &entities);
1172 assert_eq!(md, "[User](tg://user?id=99)");
1173 }
1174
1175 #[test]
1176 fn generate_markdown_custom_emoji() {
1177 let entities = vec![tl::enums::MessageEntity::CustomEmoji(
1178 tl::types::MessageEntityCustomEmoji {
1179 offset: 0,
1180 length: 2,
1181 document_id: 123456,
1182 },
1183 )];
1184 let md = generate_markdown("👍", &entities);
1185 assert_eq!(md, "");
1186 }
1187
1188 #[test]
1189 fn generate_markdown_escapes_special_chars() {
1190 let (_, empty): (_, Vec<_>) = (String::new(), vec![]);
1191 let md = generate_markdown("1 * 2 = 2", &empty);
1192 assert_eq!(md, r"1 \* 2 = 2");
1193 }
1194
1195 #[test]
1196 fn markdown_roundtrip_url() {
1197 let original = "click";
1198 let entities = vec![tl::enums::MessageEntity::TextUrl(
1199 tl::types::MessageEntityTextUrl {
1200 offset: 0,
1201 length: 5,
1202 url: "https://example.com".into(),
1203 },
1204 )];
1205 let md = generate_markdown(original, &entities);
1206 let (back, ents2) = parse_markdown(&md);
1207 assert_eq!(back, original);
1208 if let tl::enums::MessageEntity::TextUrl(e) = &ents2[0] {
1209 assert_eq!(e.url, "https://example.com");
1210 } else {
1211 panic!("roundtrip url failed");
1212 }
1213 }
1214
1215 #[test]
1216 fn html_bold_italic() {
1217 let (text, ents) = parse_html("<b>bold</b> and <i>italic</i>");
1218 assert_eq!(text, "bold and italic");
1219 assert_eq!(ents.len(), 2);
1220 }
1221
1222 #[test]
1223 fn html_link() {
1224 let (text, ents) = parse_html("<a href=\"https://example.com\">click</a>");
1225 assert_eq!(text, "click");
1226 if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
1227 assert_eq!(e.url, "https://example.com");
1228 } else {
1229 panic!("expected text url");
1230 }
1231 }
1232
1233 #[cfg(not(feature = "html5ever"))]
1235 #[test]
1236 fn html_entities_decoded() {
1237 let (text, _) = parse_html("A & B <3>");
1238 assert_eq!(text, "A & B <3>");
1239 }
1240
1241 #[test]
1242 fn generate_html_roundtrip() {
1243 let original = "Hello world";
1244 let entities = vec![tl::enums::MessageEntity::Bold(
1245 tl::types::MessageEntityBold {
1246 offset: 0,
1247 length: 5,
1248 },
1249 )];
1250 let html = generate_html(original, &entities);
1251 assert_eq!(html, "<b>Hello</b> world");
1252 let (back, ents2) = parse_html(&html);
1253 assert_eq!(back, original);
1254 assert_eq!(ents2.len(), 1);
1255 }
1256}