1use layer_tl_types as tl;
13
14pub fn parse_markdown(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
27 let mut out = String::with_capacity(text.len());
28 let mut ents = Vec::new();
29 let chars: Vec<char> = text.chars().collect();
30 let n = chars.len();
31 let mut i = 0;
32
33 let mut open_stack: Vec<(MarkdownTag, i32)> = Vec::new();
36
37 let mut utf16_off: i32 = 0;
39
40 macro_rules! push_char {
41 ($c:expr) => {{
42 let c: char = $c;
43 out.push(c);
44 utf16_off += c.len_utf16() as i32;
45 }};
46 }
47
48 while i < n {
49 if i + 2 < n && chars[i] == '`' && chars[i+1] == '`' && chars[i+2] == '`' {
51 let start = i + 3;
52 let mut j = start;
54 while j + 2 < n {
55 if chars[j] == '`' && chars[j+1] == '`' && chars[j+2] == '`' { break; }
56 j += 1;
57 }
58 if j + 2 < n {
59 let block: String = chars[start..j].iter().collect();
61 let (lang, code) = if let Some(nl) = block.find('\n') {
62 (block[..nl].trim().to_string(), block[nl+1..].to_string())
63 } else {
64 (String::new(), block)
65 };
66 let code_off = utf16_off;
67 let code_utf16: i32 = code.encode_utf16().count() as i32;
68 ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
69 offset: code_off, length: code_utf16, language: lang,
70 }));
71 for c in code.chars() { push_char!(c); }
72 i = j + 3;
73 continue;
74 }
75 }
76
77 if chars[i] == '`' {
79 let start = i + 1;
80 let mut j = start;
81 while j < n && chars[j] != '`' { j += 1; }
82 if j < n {
83 let code: String = chars[start..j].iter().collect();
84 let code_off = utf16_off;
85 let code_utf16: i32 = code.encode_utf16().count() as i32;
86 ents.push(tl::enums::MessageEntity::Code(tl::types::MessageEntityCode {
87 offset: code_off, length: code_utf16,
88 }));
89 for c in code.chars() { push_char!(c); }
90 i = j + 1;
91 continue;
92 }
93 }
94
95 if chars[i] == '[' {
97 let text_start = i + 1;
98 let mut j = text_start;
99 let mut depth = 1i32;
100 while j < n {
101 if chars[j] == '[' { depth += 1; }
102 if chars[j] == ']' { depth -= 1; if depth == 0 { break; } }
103 j += 1;
104 }
105 if j < n && j + 1 < n && chars[j+1] == '(' {
106 let link_start = j + 2;
107 let mut k = link_start;
108 while k < n && chars[k] != ')' { k += 1; }
109 if k < n {
110 let inner_text: String = chars[text_start..j].iter().collect();
111 let url: String = chars[link_start..k].iter().collect();
112 const MENTION_PFX: &str = "tg://user?id=";
113 let ent_off = utf16_off;
114 for c in inner_text.chars() { push_char!(c); }
115 let ent_len = utf16_off - ent_off;
116 if url.starts_with(MENTION_PFX) {
117 if let Ok(uid) = url[MENTION_PFX.len()..].parse::<i64>() {
118 ents.push(tl::enums::MessageEntity::MentionName(
119 tl::types::MessageEntityMentionName { offset: ent_off, length: ent_len, user_id: uid }
120 ));
121 }
122 } else {
123 ents.push(tl::enums::MessageEntity::TextUrl(
124 tl::types::MessageEntityTextUrl { offset: ent_off, length: ent_len, url }
125 ));
126 }
127 i = k + 1;
128 continue;
129 }
130 }
131 }
132
133 let two: Option<(&str, MarkdownTag)> = if i + 1 < n {
135 let pair = [chars[i], chars[i+1]];
136 match pair {
137 ['*','*'] => Some(("**", MarkdownTag::Bold)),
138 ['_','_'] => Some(("__", MarkdownTag::Italic)),
139 ['~','~'] => Some(("~~", MarkdownTag::Strike)),
140 ['|','|'] => Some(("||", MarkdownTag::Spoiler)),
141 _ => None,
142 }
143 } else { None };
144
145 if let Some((_delim, tag)) = two {
146 if let Some(pos) = open_stack.iter().rposition(|(t, _)| *t == tag) {
148 let (_, start_off) = open_stack.remove(pos);
149 let length = utf16_off - start_off;
150 let entity = match tag {
151 MarkdownTag::Bold => tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset: start_off, length }),
152 MarkdownTag::Italic => tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset: start_off, length }),
153 MarkdownTag::Strike => tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset: start_off, length }),
154 MarkdownTag::Spoiler => tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset: start_off, length }),
155 };
156 if length > 0 { ents.push(entity); }
157 } else {
158 open_stack.push((tag, utf16_off));
159 }
160 i += 2;
161 continue;
162 }
163
164 push_char!(chars[i]);
166 i += 1;
167 }
168
169 (out, ents)
170}
171
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173enum MarkdownTag { Bold, Italic, Strike, Spoiler }
174
175pub fn generate_markdown(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
177 use tl::enums::MessageEntity as ME;
178
179 let mut insertions: Vec<(i32, &'static str)> = Vec::new();
181
182 for ent in entities {
183 match ent {
184 ME::Bold(e) => { insertions.push((e.offset, "**")); insertions.push((e.offset+e.length, "**")); }
185 ME::Italic(e) => { insertions.push((e.offset, "__")); insertions.push((e.offset+e.length, "__")); }
186 ME::Strike(e) => { insertions.push((e.offset, "~~")); insertions.push((e.offset+e.length, "~~")); }
187 ME::Spoiler(e) => { insertions.push((e.offset, "||")); insertions.push((e.offset+e.length, "||")); }
188 ME::Code(e) => { insertions.push((e.offset, "`")); insertions.push((e.offset+e.length, "`")); }
189 _ => {} }
191 }
192 insertions.sort_by_key(|&(pos, _)| pos);
193
194 let mut result = String::with_capacity(text.len() + insertions.len() * 4);
196 let mut ins_idx = 0;
197 let mut utf16_pos: i32 = 0;
198
199 for ch in text.chars() {
200 while ins_idx < insertions.len() && insertions[ins_idx].0 <= utf16_pos {
201 result.push_str(insertions[ins_idx].1);
202 ins_idx += 1;
203 }
204 result.push(ch);
205 utf16_pos += ch.len_utf16() as i32;
206 }
207 while ins_idx < insertions.len() {
208 result.push_str(insertions[ins_idx].1);
209 ins_idx += 1;
210 }
211
212 for ent in entities {
214 match ent {
215 tl::enums::MessageEntity::Pre(_) | tl::enums::MessageEntity::TextUrl(_) |
216 tl::enums::MessageEntity::MentionName(_) => {
217 }
220 _ => {}
221 }
222 }
223
224 result
225}
226
227pub fn parse_html(html: &str) -> (String, Vec<tl::enums::MessageEntity>) {
242 let mut out = String::with_capacity(html.len());
243 let mut ents = Vec::new();
244 let mut stack: Vec<(HtmlTag, i32, Option<String>)> = Vec::new();
246 let mut utf16_off: i32 = 0;
247
248 let bytes = html.as_bytes();
249 let len = bytes.len();
250 let mut i = 0;
251
252 while i < len {
253 if bytes[i] == b'<' {
254 let tag_start = i + 1;
256 let mut j = tag_start;
257 while j < len && bytes[j] != b'>' { j += 1; }
258 let tag_content = &html[tag_start..j];
259 i = j + 1;
260
261 let is_close = tag_content.starts_with('/');
262 let tag_str = if is_close { tag_content[1..].trim() } else { tag_content.trim() };
263
264 let (tag_name, attrs) = parse_tag(tag_str);
266
267 if is_close {
268 if let Some(pos) = stack.iter().rposition(|(t, _, _)| t.name() == tag_name) {
270 let (htag, start_off, extra) = stack.remove(pos);
271 let length = utf16_off - start_off;
272 if length > 0 {
273 let entity = match htag {
274 HtmlTag::Bold => Some(tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset: start_off, length })),
275 HtmlTag::Italic => Some(tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset: start_off, length })),
276 HtmlTag::Underline => Some(tl::enums::MessageEntity::Underline(tl::types::MessageEntityUnderline { offset: start_off, length })),
277 HtmlTag::Strike => Some(tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset: start_off, length })),
278 HtmlTag::Spoiler => Some(tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset: start_off, length })),
279 HtmlTag::Code => {
280 Some(tl::enums::MessageEntity::Code(tl::types::MessageEntityCode { offset: start_off, length }))
282 }
283 HtmlTag::Pre => {
284 let lang = extra.unwrap_or_default();
285 Some(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre { offset: start_off, length, language: lang }))
286 }
287 HtmlTag::Link(url) => {
288 const PFX: &str = "tg://user?id=";
289 if url.starts_with(PFX) {
290 if let Ok(uid) = url[PFX.len()..].parse::<i64>() {
291 Some(tl::enums::MessageEntity::MentionName(tl::types::MessageEntityMentionName { offset: start_off, length, user_id: uid }))
292 } else { None }
293 } else {
294 Some(tl::enums::MessageEntity::TextUrl(tl::types::MessageEntityTextUrl { offset: start_off, length, url }))
295 }
296 }
297 HtmlTag::CustomEmoji(id) => {
298 Some(tl::enums::MessageEntity::CustomEmoji(tl::types::MessageEntityCustomEmoji { offset: start_off, length, document_id: id }))
299 }
300 HtmlTag::Unknown => None,
301 };
302 if let Some(e) = entity { ents.push(e); }
303 }
304 }
305 } else {
306 let htag = match tag_name {
308 "b" | "strong" => HtmlTag::Bold,
309 "i" | "em" => HtmlTag::Italic,
310 "u" => HtmlTag::Underline,
311 "s" | "del" | "strike" => HtmlTag::Strike,
312 "tg-spoiler" => HtmlTag::Spoiler,
313 "code" => HtmlTag::Code,
314 "pre" => HtmlTag::Pre,
315 "a" => {
316 let href = attrs.iter()
317 .find(|(k, _)| k == "href")
318 .map(|(_, v)| v.clone())
319 .unwrap_or_default();
320 HtmlTag::Link(href)
321 }
322 "tg-emoji" => {
323 let id = attrs.iter()
324 .find(|(k, _)| k == "emoji-id")
325 .and_then(|(_, v)| v.parse::<i64>().ok())
326 .unwrap_or(0);
327 HtmlTag::CustomEmoji(id)
328 }
329 "br" => {
330 out.push('\n');
332 utf16_off += 1;
333 continue;
334 }
335 _ => HtmlTag::Unknown,
336 };
337 stack.push((htag, utf16_off, None));
338 }
339 } else {
340 let text_start = i;
342 while i < len && bytes[i] != b'<' { i += 1; }
343 let raw_text = &html[text_start..i];
344 let decoded = decode_html_entities(raw_text);
345 for ch in decoded.chars() {
346 out.push(ch);
347 utf16_off += ch.len_utf16() as i32;
348 }
349 }
350 }
351
352 (out, ents)
353}
354
355fn decode_html_entities(s: &str) -> String {
356 s.replace("&", "&")
357 .replace("<", "<")
358 .replace(">", ">")
359 .replace(""", "\"")
360 .replace("'", "'")
361 .replace(" ", "\u{00A0}")
362}
363
364fn parse_tag(s: &str) -> (&str, Vec<(String, String)>) {
365 let mut parts = s.splitn(2, char::is_whitespace);
366 let name = parts.next().unwrap_or("").trim_end_matches('/');
367 let rest = parts.next().unwrap_or("");
368 let attrs = parse_attrs(rest);
369 (name, attrs)
370}
371
372fn parse_attrs(s: &str) -> Vec<(String, String)> {
373 let mut result = Vec::new();
374 let mut rem = s.trim();
375 while !rem.is_empty() {
376 if let Some(eq) = rem.find('=') {
378 let key = rem[..eq].trim().to_string();
379 rem = rem[eq+1..].trim_start();
380 let (val, rest) = if rem.starts_with('"') {
381 let end = rem[1..].find('"').map(|p| p+1).unwrap_or(rem.len()-1);
382 (rem[1..end].to_string(), &rem[end+1..])
383 } else if rem.starts_with('\'') {
384 let end = rem[1..].find('\'').map(|p| p+1).unwrap_or(rem.len()-1);
385 (rem[1..end].to_string(), &rem[end+1..])
386 } else {
387 let end = rem.find(char::is_whitespace).unwrap_or(rem.len());
388 (rem[..end].to_string(), &rem[end..])
389 };
390 result.push((key, val));
391 rem = rest.trim_start();
392 } else {
393 break;
394 }
395 }
396 result
397}
398
399#[allow(dead_code)]
400#[derive(Debug, Clone)]
401enum HtmlTag {
402 Bold,
403 Italic,
404 Underline,
405 Strike,
406 Spoiler,
407 Code,
408 Pre,
409 Link(String),
410 CustomEmoji(i64),
411 Unknown,
412}
413
414impl HtmlTag {
415 fn name(&self) -> &str {
416 match self {
417 Self::Bold => "b",
418 Self::Italic => "i",
419 Self::Underline => "u",
420 Self::Strike => "s",
421 Self::Spoiler => "tg-spoiler",
422 Self::Code => "code",
423 Self::Pre => "pre",
424 Self::Link(_) => "a",
425 Self::CustomEmoji(_) => "tg-emoji",
426 Self::Unknown => "",
427 }
428 }
429}
430
431pub fn generate_html(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
433 use tl::enums::MessageEntity as ME;
434
435 let mut markers: Vec<(i32, bool, String)> = Vec::new();
437
438 for ent in entities {
439 let (off, len, open, close) = match ent {
440 ME::Bold(e) => (e.offset, e.length, "<b>".into(), "</b>".into()),
441 ME::Italic(e) => (e.offset, e.length, "<i>".into(), "</i>".into()),
442 ME::Underline(e) => (e.offset, e.length, "<u>".into(), "</u>".into()),
443 ME::Strike(e) => (e.offset, e.length, "<s>".into(), "</s>".into()),
444 ME::Spoiler(e) => (e.offset, e.length, "<tg-spoiler>".into(), "</tg-spoiler>".into()),
445 ME::Code(e) => (e.offset, e.length, "<code>".into(), "</code>".into()),
446 ME::Pre(e) => {
447 let lang = if e.language.is_empty() { String::new() }
448 else { format!(" class=\"language-{}\"", e.language) };
449 (e.offset, e.length, format!("<pre><code{lang}>"), "</code></pre>".into())
450 }
451 ME::TextUrl(e) => (e.offset, e.length, format!("<a href=\"{}\">", escape_html(&e.url)), "</a>".into()),
452 ME::MentionName(e) => (e.offset, e.length, format!("<a href=\"tg://user?id={}\">", e.user_id), "</a>".into()),
453 ME::CustomEmoji(e) => (e.offset, e.length, format!("<tg-emoji emoji-id=\"{}\">", e.document_id), "</tg-emoji>".into()),
454 _ => continue,
455 };
456 markers.push((off, true, open));
457 markers.push((off + len, false, close));
458 }
459
460 markers.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
462 a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open)) });
464
465 let mut result = String::with_capacity(text.len() + markers.iter().map(|(_, _, s)| s.len()).sum::<usize>());
466 let mut marker_idx = 0;
467 let mut utf16_pos: i32 = 0;
468
469 for ch in text.chars() {
470 while marker_idx < markers.len() && markers[marker_idx].0 <= utf16_pos {
471 result.push_str(&markers[marker_idx].2);
472 marker_idx += 1;
473 }
474 match ch {
476 '&' => result.push_str("&"),
477 '<' => result.push_str("<"),
478 '>' => result.push_str(">"),
479 '"' => result.push_str("""),
480 c => result.push(c),
481 }
482 utf16_pos += ch.len_utf16() as i32;
483 }
484 while marker_idx < markers.len() {
485 result.push_str(&markers[marker_idx].2);
486 marker_idx += 1;
487 }
488
489 result
490}
491
492fn escape_html(s: &str) -> String {
493 s.replace('&', "&").replace('<', "<").replace('>', ">").replace('"', """)
494}
495
496#[cfg(test)]
499mod tests {
500 use super::*;
501
502 #[test]
503 fn markdown_bold() {
504 let (text, ents) = parse_markdown("Hello **world**!");
505 assert_eq!(text, "Hello world!");
506 assert_eq!(ents.len(), 1);
507 if let tl::enums::MessageEntity::Bold(b) = &ents[0] {
508 assert_eq!(b.offset, 6);
509 assert_eq!(b.length, 5);
510 } else { panic!("expected bold"); }
511 }
512
513 #[test]
514 fn markdown_inline_code() {
515 let (text, ents) = parse_markdown("Use `foo()` to do it");
516 assert_eq!(text, "Use foo() to do it");
517 assert!(matches!(ents[0], tl::enums::MessageEntity::Code(_)));
518 }
519
520 #[test]
521 fn html_bold_italic() {
522 let (text, ents) = parse_html("<b>bold</b> and <i>italic</i>");
523 assert_eq!(text, "bold and italic");
524 assert_eq!(ents.len(), 2);
525 }
526
527 #[test]
528 fn html_link() {
529 let (text, ents) = parse_html("<a href=\"https://example.com\">click</a>");
530 assert_eq!(text, "click");
531 if let tl::enums::MessageEntity::TextUrl(e) = &ents[0] {
532 assert_eq!(e.url, "https://example.com");
533 } else { panic!("expected text url"); }
534 }
535
536 #[test]
537 fn html_entities_decoded() {
538 let (text, _) = parse_html("A & B <3>");
539 assert_eq!(text, "A & B <3>");
540 }
541
542 #[test]
543 fn generate_html_roundtrip() {
544 let original = "Hello world";
545 let entities = vec![tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset: 0, length: 5 })];
546 let html = generate_html(original, &entities);
547 assert_eq!(html, "<b>Hello</b> world");
548 let (back, ents2) = parse_html(&html);
549 assert_eq!(back, original);
550 assert_eq!(ents2.len(), 1);
551 }
552}