1use std::{borrow::Cow, sync::LazyLock};
2
3use pulldown_cmark::{Event, Options as DeOptions, Parser, Tag, TagEnd};
4use pulldown_cmark_to_cmark::Options as SerOptions;
5use regex::Regex;
6
7macro_rules! regex {
8 ($re:literal $(,)?) => {
9 LazyLock::new(|| regex::Regex::new($re).unwrap())
10 };
11}
12
13static TG_MD_ESCAPE_REGEX: LazyLock<Regex> = regex!(r"[_*\[\]()~`>#+\-=|{}\.!\\]");
14static TG_MD_CODE_ESCAPE_REGEX: LazyLock<Regex> = regex!(r"[`\\]");
15static TG_MD_SERIALIZE_OPTIONS: LazyLock<SerOptions> = LazyLock::new(|| SerOptions {
16 code_block_token_count: 3,
17 ..Default::default()
18});
19pub fn tg_escape(text: &str) -> String {
24 let mut options = DeOptions::empty();
25 options.insert(DeOptions::ENABLE_STRIKETHROUGH);
26
27 let mut inside_code = false;
28
29 let parser = Parser::new_ext(text, options).map(|event| {
30 match &event {
31 Event::Start(Tag::CodeBlock(_)) => {
32 inside_code = true;
33
34 event
35 }
36 Event::End(TagEnd::CodeBlock) => {
37 inside_code = false;
38
39 event
40 }
41 Event::Text(text) | Event::Code(text) => {
42 if text.len() == 1 {
43 return event;
45 }
46
47 let re = if inside_code || matches!(&event, Event::Code(_)) {
48 &TG_MD_CODE_ESCAPE_REGEX
49 } else {
50 &TG_MD_ESCAPE_REGEX
51 };
52
53 let replaced = re.replace_all(text, r"\$0");
55
56 match replaced {
57 Cow::Borrowed(_) => event,
58 Cow::Owned(text) => match event {
59 Event::Text(_) => Event::Text(text.into()),
60 Event::Code(_) => Event::Code(text.into()),
61 _ => unreachable!(),
62 },
63 }
64 }
65 _ => event,
66 }
67 });
68
69 let mut res = String::with_capacity(text.len());
70
71 pulldown_cmark_to_cmark::cmark_with_options(parser, &mut res, TG_MD_SERIALIZE_OPTIONS.clone())
72 .expect("writing to string failed!");
73
74 res
75}
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80 use pretty_assertions::assert_eq;
81
82 #[test]
83 fn test_md_escape() {
84 assert_eq!(
85 tg_escape("Soon you'll get a stats for today, and the overall status can be viewed by the /get_stat command :)"),
86 r#"Soon you'll get a stats for today, and the overall status can be viewed by the /get\_stat command :\)"#
87 )
88 }
89
90 #[test]
91 fn test_escape_outside_code_all_specials() {
92 let input = r#"a_*~`>#+-=|{}.!\x"#;
94 let expected = r"a\_\*\~\`\\>\#\+\-\=\|\{\}\.\!\\x";
95
96 assert_eq!(tg_escape(input), expected);
97 }
98
99 #[test]
100 fn test_inline_code_escapes_only_backtick_and_backslash() {
101 let input = r#"Before `a_*~>#+-=|{}.!\` after"#;
103 let expected = r#"Before `a_*~>#+-=|{}.!\\` after"#;
104
105 assert_eq!(tg_escape(input), expected);
106 }
107
108 #[test]
109 fn test_code_block_escapes_only_backtick_and_backslash() {
110 let input = r#"```
112a_*[]()~`>#+-=|{}.!\
113```"#;
114 let expected = r#"
115```
116a_*[]()~\`>#+-=|{}.!\\
117```"#;
118
119 assert_eq!(tg_escape(input), expected);
120 }
121
122 #[test]
123 fn test_mixed_multiple_inline_code_segments() {
124 let input = r#"pre_* `codeA_*` mid_* `codeB_\` post_*"#;
125 let expected = r#"pre\_\* `codeA_*` mid\_\* `codeB_\\` post\_\*"#;
126
127 assert_eq!(tg_escape(input), expected);
128 }
129
130 #[test]
131 fn test_emphasis_around_text_with_inline_code() {
132 let input = r#"*start* `inside_*` end_*"#;
133 let expected = r#"*start* `inside_*` end\_\*"#;
134
135 assert_eq!(tg_escape(input), expected);
136 }
137
138 #[test]
139 #[ignore = "this test is failing"]
140 fn test_escaped_characters() {
141 let input = r"Escaped characters: \\ \* \_ \[ \] \( \) \~";
142 let expected = r"Escaped characters: \\\\ \\\* \\\_ \\\[ \\\] \\\( \\\) \\\~";
143
144 assert_eq!(tg_escape(input), expected);
145 }
146
147 #[test]
148 #[ignore = "this test is failing"]
149 fn test_math_expressions() {
150 let input = r"Mathematical expressions: 2 + 2 = 4, x > y, a <= b";
151 let expected = r"Mathematical expressions: 2 \+ 2 \= 4, x \> y, a \<\= b";
152
153 assert_eq!(tg_escape(input), expected);
154 }
155}