1const TG_SPECIAL_CHARS: &[char] = &[
5 '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!', '\\',
6];
7
8const TG_SPECIAL: [bool; 128] = {
10 let mut table = [false; 128];
11 let mut i = 0;
12 while i < TG_SPECIAL_CHARS.len() {
13 table[TG_SPECIAL_CHARS[i] as usize] = true;
14 i += 1;
15 }
16 table
17};
18
19fn is_tg_special(c: char) -> bool {
21 let code = c as u32;
22 code < 128 && TG_SPECIAL[code as usize]
23}
24
25fn push_code_escaped(out: &mut String, c: char) {
27 if c == '`' || c == '\\' {
28 out.push('\\');
29 }
30 out.push(c);
31}
32
33fn find_code_block_end(after_opening: &str) -> Option<usize> {
40 let newline_pos = after_opening.find('\n')?;
41 let mut search_from = newline_pos;
42 while search_from < after_opening.len() {
43 let pos = after_opening[search_from..].find("\n```")?;
44 let end = search_from + pos + 4; if end >= after_opening.len() || after_opening[end..].starts_with('\n') {
46 return Some(end);
47 }
48 search_from += pos + 1;
49 }
50 None
51}
52
53fn find_closing(content: &str, delim: &str) -> Option<usize> {
61 let mut i = 0;
62
63 while i < content.len() {
64 let ch = content[i..].chars().next().unwrap();
65
66 if ch == '\\'
68 && let Some(next_ch) = content.get(i + 1..).and_then(|s| s.chars().next())
69 && is_tg_special(next_ch)
70 {
71 i += 1 + next_ch.len_utf8();
72 continue;
73 }
74
75 if content[i..].starts_with("```")
77 && let Some(end) = find_code_block_end(&content[i + 3..])
78 {
79 i += 3 + end;
80 continue;
81 }
82
83 if ch == '`'
85 && let Some(pos) = content[i + 1..].find('`')
86 {
87 i += pos + 2; continue;
89 }
90
91 if content[i..].starts_with(delim) {
93 return Some(i);
94 }
95
96 i += ch.len_utf8();
97 }
98
99 None
100}
101
102#[derive(Clone, Copy, PartialEq, Eq)]
108enum DelimiterGuard {
109 None,
111 RejectTripled,
115 RejectDoubledClose,
119}
120
121struct InlineDelimiter {
122 delim: &'static str,
123 guard: DelimiterGuard,
124}
125
126impl InlineDelimiter {
127 fn open_rejected(&self, after_open: &str) -> bool {
129 match self.guard {
130 DelimiterGuard::RejectTripled => after_open.starts_with(&self.delim[..1]),
131 _ => false,
132 }
133 }
134
135 fn close_rejected(&self, after_open: &str, close_pos: usize) -> bool {
137 match self.guard {
138 DelimiterGuard::RejectDoubledClose => {
139 let dc = self.delim.as_bytes()[0];
140 let len = self.delim.len();
141 after_open.as_bytes().get(close_pos + len) == Some(&dc)
142 || (close_pos > 0 && after_open.as_bytes().get(close_pos - 1) == Some(&dc))
143 }
144 _ => false,
145 }
146 }
147}
148
149const INLINE_DELIMITERS: &[InlineDelimiter] = &[
154 InlineDelimiter {
155 delim: "||",
156 guard: DelimiterGuard::None,
157 }, InlineDelimiter {
159 delim: "__",
160 guard: DelimiterGuard::RejectTripled,
161 }, InlineDelimiter {
163 delim: "*",
164 guard: DelimiterGuard::None,
165 }, InlineDelimiter {
167 delim: "_",
168 guard: DelimiterGuard::RejectDoubledClose,
169 }, InlineDelimiter {
171 delim: "~",
172 guard: DelimiterGuard::None,
173 }, ];
175
176enum Fragment<'a> {
182 Escaped(char),
184 CodeBlock(&'a str),
186 InlineCode(&'a str),
188 Link { text: &'a str, url: &'a str },
190 Formatted {
192 delim: &'static str,
193 content: &'a str,
194 },
195 Plain(char),
197}
198
199impl Fragment<'_> {
200 fn render(&self, out: &mut String) {
201 match self {
202 Self::Escaped(c) => {
203 out.push('\\');
204 out.push(*c);
205 }
206 Self::CodeBlock(content) => {
207 out.push_str("```");
208 for c in content.chars() {
209 push_code_escaped(out, c);
210 }
211 out.push_str("```");
212 }
213 Self::InlineCode(content) => {
214 out.push('`');
215 for c in content.chars() {
216 push_code_escaped(out, c);
217 }
218 out.push('`');
219 }
220 Self::Link { text, url } => {
221 out.push('[');
222 out.push_str(&tg_escape(text));
223 out.push_str("](");
224 out.push_str(url);
225 out.push(')');
226 }
227 Self::Formatted { delim, content } => {
228 out.push_str(delim);
229 out.push_str(&tg_escape(content));
230 out.push_str(delim);
231 }
232 Self::Plain(c) => {
233 if is_tg_special(*c) {
234 out.push('\\');
235 }
236 out.push(*c);
237 }
238 }
239 }
240}
241
242fn try_escaped_char<'a>(input: &mut &'a str) -> Option<Fragment<'a>> {
248 let rest = *input;
249 let mut chars = rest.chars();
250 if chars.next()? != '\\' {
251 return None;
252 }
253 let next = chars.next().filter(|c| is_tg_special(*c))?;
254 *input = &rest[1 + next.len_utf8()..];
255 Some(Fragment::Escaped(next))
256}
257
258fn try_code_block<'a>(input: &mut &'a str) -> Option<Fragment<'a>> {
259 let rest = *input;
260 let after_opening = rest.strip_prefix("```")?;
261 let end = find_code_block_end(after_opening)?;
262 let content = &after_opening[..end - 3]; *input = &after_opening[end..];
264 Some(Fragment::CodeBlock(content))
265}
266
267fn try_inline_code<'a>(input: &mut &'a str) -> Option<Fragment<'a>> {
268 let rest = *input;
269 let after_backtick = rest.strip_prefix('`')?;
270 let close = after_backtick.find('`')?;
271 let content = &after_backtick[..close];
272 *input = &after_backtick[close + 1..];
273 Some(Fragment::InlineCode(content))
274}
275
276fn try_link<'a>(input: &mut &'a str) -> Option<Fragment<'a>> {
277 let rest = *input;
278 let after_bracket = rest.strip_prefix('[')?;
279
280 let bracket_close = find_closing(after_bracket, "]")?;
281 let after_text = after_bracket[bracket_close + 1..].strip_prefix('(')?;
282 let paren_close = after_text.find(')')?;
283
284 let text = &after_bracket[..bracket_close];
285 let url = &after_text[..paren_close];
286 *input = &after_text[paren_close + 1..];
287 Some(Fragment::Link { text, url })
288}
289
290fn try_formatting<'a>(input: &mut &'a str) -> Option<Fragment<'a>> {
291 let rest = *input;
292
293 for d in INLINE_DELIMITERS {
294 if !rest.starts_with(d.delim) {
295 continue;
296 }
297
298 let len = d.delim.len();
299 let after_open = &rest[len..];
300
301 if d.open_rejected(after_open) {
302 continue;
303 }
304
305 let Some(close) = find_closing(after_open, d.delim) else {
306 continue;
307 };
308
309 if d.close_rejected(after_open, close) {
310 continue;
311 }
312
313 let content = &after_open[..close];
314 *input = &after_open[close + len..];
315 return Some(Fragment::Formatted {
316 delim: d.delim,
317 content,
318 });
319 }
320
321 None
322}
323
324fn next_fragment<'a>(input: &mut &'a str) -> Fragment<'a> {
326 if let Some(f) = try_escaped_char(input) {
327 return f;
328 }
329 if let Some(f) = try_code_block(input) {
330 return f;
331 }
332 if let Some(f) = try_inline_code(input) {
333 return f;
334 }
335 if let Some(f) = try_link(input) {
336 return f;
337 }
338 if let Some(f) = try_formatting(input) {
339 return f;
340 }
341
342 let ch = input.chars().next().unwrap();
343 *input = &input[ch.len_utf8()..];
344 Fragment::Plain(ch)
345}
346
347pub fn tg_escape(text: &str) -> String {
359 let mut out = String::with_capacity(text.len());
360 let mut input = text;
361
362 while !input.is_empty() {
363 next_fragment(&mut input).render(&mut out);
364 }
365
366 out
367}
368
369#[cfg(feature = "python")]
370mod python {
371 use pyo3::prelude::*;
372
373 #[pyfunction]
380 fn tg_escape(text: &str) -> String {
381 super::tg_escape(text)
382 }
383
384 #[pymodule]
385 #[pyo3(name = "_core")]
386 fn telegram_escape_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
387 m.add_function(wrap_pyfunction!(tg_escape, m)?)?;
388 Ok(())
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use super::*;
395 use pretty_assertions::assert_eq;
396
397 #[test]
398 fn test_md_escape() {
399 assert_eq!(
400 tg_escape(
401 "Soon you'll get a stats for today, and the overall status can be viewed by the /get_stat command :)"
402 ),
403 r#"Soon you'll get a stats for today, and the overall status can be viewed by the /get\_stat command :\)"#
404 )
405 }
406
407 #[test]
408 fn test_escape_outside_code_all_specials() {
409 let input = r#"a_*~`>#+-=|{}.!\x"#;
411 let expected = r"a\_\*\~\`\>\#\+\-\=\|\{\}\.\!\\x";
412
413 assert_eq!(tg_escape(input), expected);
414 }
415
416 #[test]
417 fn test_inline_code_escapes_only_backtick_and_backslash() {
418 let input = r#"Before `a_*~>#+-=|{}.!\` after"#;
420 let expected = r#"Before `a_*~>#+-=|{}.!\\` after"#;
421
422 assert_eq!(tg_escape(input), expected);
423 }
424
425 #[test]
426 fn test_code_block_escapes_only_backtick_and_backslash() {
427 let input = "```\na_*[]()~`>#+-=|{}.!\\\n```";
429 let expected = "```\na_*[]()~\\`>#+-=|{}.!\\\\\n```";
430
431 assert_eq!(tg_escape(input), expected);
432 }
433
434 #[test]
435 fn test_mixed_multiple_inline_code_segments() {
436 let input = r#"pre_* `codeA_*` mid_* `codeB_\` post_*"#;
439 let expected = r#"pre_\* `codeA_*` mid_* `codeB_\\` post\_*"#;
440
441 assert_eq!(tg_escape(input), expected);
442 }
443
444 #[test]
445 fn test_emphasis_around_text_with_inline_code() {
446 let input = r#"*start* `inside_*` end_*"#;
447 let expected = r#"*start* `inside_*` end\_\*"#;
448
449 assert_eq!(tg_escape(input), expected);
450 }
451
452 #[test]
453 fn test_escaped_characters() {
454 let input = r"Escaped characters: \\ \* \_ \[ \] \( \) \~";
456 let expected = r"Escaped characters: \\ \* \_ \[ \] \( \) \~";
457
458 assert_eq!(tg_escape(input), expected);
459 }
460
461 #[test]
462 fn test_math_expressions() {
463 let input = r"Mathematical expressions: 2 + 2 = 4, x > y, a <= b";
465 let expected = r"Mathematical expressions: 2 \+ 2 \= 4, x \> y, a <\= b";
466
467 assert_eq!(tg_escape(input), expected);
468 }
469
470 #[test]
473 fn test_bold_preserved() {
474 assert_eq!(tg_escape("*bold*"), "*bold*");
475 }
476
477 #[test]
478 fn test_italic_preserved() {
479 assert_eq!(tg_escape("_italic_"), "_italic_");
480 }
481
482 #[test]
483 fn test_underline_preserved() {
484 assert_eq!(tg_escape("__underline__"), "__underline__");
485 }
486
487 #[test]
488 fn test_strikethrough_preserved() {
489 assert_eq!(tg_escape("~strikethrough~"), "~strikethrough~");
490 }
491
492 #[test]
493 fn test_spoiler_preserved() {
494 assert_eq!(tg_escape("||spoiler||"), "||spoiler||");
495 }
496
497 #[test]
498 fn test_link_preserved() {
499 assert_eq!(
500 tg_escape("[Click here](https://example.com)"),
501 "[Click here](https://example.com)"
502 );
503 }
504
505 #[test]
506 fn test_link_text_escaped() {
507 assert_eq!(
508 tg_escape("[click + go](https://example.com)"),
509 r"[click \+ go](https://example.com)"
510 );
511 }
512
513 #[test]
514 fn test_nested_formatting() {
515 assert_eq!(tg_escape("*bold _italic_ bold*"), "*bold _italic_ bold*");
516 }
517
518 #[test]
519 fn test_bold_with_special_chars() {
520 assert_eq!(tg_escape("hello *world*!"), r"hello *world*\!");
521 }
522
523 #[test]
524 fn test_mixed_formatting_and_plain() {
525 assert_eq!(
526 tg_escape("hello *world* and _stuff_!"),
527 r"hello *world* and _stuff_\!"
528 );
529 }
530
531 #[test]
532 fn test_code_block_with_language() {
533 let input = "```rust\nfn main() {}\n```";
534 let expected = "```rust\nfn main() {}\n```";
535 assert_eq!(tg_escape(input), expected);
536 }
537
538 #[test]
541 fn test_empty_string() {
542 assert_eq!(tg_escape(""), "");
543 }
544
545 #[test]
546 fn test_no_special_chars() {
547 assert_eq!(tg_escape("hello world"), "hello world");
548 }
549
550 #[test]
551 fn test_unmatched_bold() {
552 assert_eq!(tg_escape("price is 5*3"), r"price is 5\*3");
553 }
554
555 #[test]
556 fn test_unmatched_italic() {
557 assert_eq!(tg_escape("file_name"), r"file\_name");
558 }
559
560 #[test]
561 fn test_unmatched_backtick() {
562 assert_eq!(tg_escape("it's a `test"), r"it's a \`test");
563 }
564
565 #[test]
566 fn test_adjacent_formatting() {
567 assert_eq!(tg_escape("*bold*_italic_"), "*bold*_italic_");
568 }
569
570 #[test]
571 fn test_formatting_with_special_inside() {
572 assert_eq!(tg_escape("*2+2=4*"), r"*2\+2\=4*");
573 }
574
575 #[test]
576 fn test_multiple_newlines() {
577 assert_eq!(tg_escape("a\n\nb"), "a\n\nb");
578 }
579
580 #[test]
581 fn test_non_special_chars_pass_through() {
582 assert_eq!(tg_escape("a < b @ c / d : e ; f"), "a < b @ c / d : e ; f");
584 }
585
586 #[test]
587 fn test_code_block_with_backticks_inside() {
588 let input = "```\nsome `code` here\n```";
589 let expected = "```\nsome \\`code\\` here\n```";
590 assert_eq!(tg_escape(input), expected);
591 }
592
593 #[test]
594 fn test_link_with_formatted_text() {
595 assert_eq!(
596 tg_escape("[*bold link*](https://example.com)"),
597 "[*bold link*](https://example.com)"
598 );
599 }
600
601 #[test]
602 fn test_unmatched_bracket_not_link() {
603 assert_eq!(tg_escape("[not a link"), r"\[not a link");
604 }
605
606 #[test]
607 fn test_bracket_without_paren() {
608 assert_eq!(tg_escape("[text] no url"), r"\[text\] no url");
609 }
610
611 #[test]
612 fn test_spoiler_with_special_inside() {
613 assert_eq!(tg_escape("||secret!||"), r"||secret\!||");
614 }
615
616 #[test]
617 fn test_underline_vs_italic() {
618 assert_eq!(tg_escape("__underline__"), "__underline__");
620 assert_eq!(tg_escape("_italic_"), "_italic_");
622 }
623
624 #[test]
625 fn test_escaped_delimiter_not_matched() {
626 assert_eq!(tg_escape(r"\*not bold\*"), r"\*not bold\*");
628 }
629
630 #[test]
631 fn test_backslash_before_non_special() {
632 assert_eq!(tg_escape(r"\n"), r"\\n");
634 }
635
636 #[test]
637 fn test_consecutive_specials() {
638 assert_eq!(tg_escape("()[]{}"), r"\(\)\[\]\{\}");
639 }
640
641 #[test]
642 fn test_cyrillic_text() {
643 assert_eq!(tg_escape("НОВЫЙ"), "НОВЫЙ");
644 assert_eq!(tg_escape("Привет мир"), "Привет мир");
645 assert_eq!(tg_escape("Привет *мир*!"), r"Привет *мир*\!");
646 }
647
648 #[test]
649 fn test_multibyte_in_code() {
650 assert_eq!(tg_escape("`код`"), "`код`");
651 assert_eq!(tg_escape("```\nкод\n```"), "```\nкод\n```");
652 }
653
654 #[test]
655 fn test_delimiter_ordering_invariant() {
656 for (i, a) in INLINE_DELIMITERS.iter().enumerate() {
659 for b in &INLINE_DELIMITERS[i + 1..] {
660 assert!(
661 !b.delim.starts_with(a.delim),
662 "'{0}' is a prefix of '{1}' but comes before it — \
663 multi-char delimiters must precede their subsets",
664 a.delim,
665 b.delim,
666 );
667 }
668 }
669 }
670}