use ferogram_tl_types as tl;
#[deprecated(
since = "0.3.9",
note = "Telegram considers MarkdownV1 legacy. Use `parse_markdown` (V2) for new code."
)]
pub fn parse_markdown_v1(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
parse_markdown_v1_impl(text)
}
fn parse_markdown_v1_impl(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
let mut out = String::with_capacity(text.len());
let mut ents = Vec::new();
let chars: Vec<char> = text.chars().collect();
let n = chars.len();
let mut i = 0;
let mut open_stack: Vec<(MarkdownTagV1, i32)> = Vec::new();
let mut utf16_off: i32 = 0;
macro_rules! push_char {
($c:expr) => {{
let c: char = $c;
out.push(c);
utf16_off += c.len_utf16() as i32;
}};
}
while i < n {
if chars[i] == '\\' && i + 1 < n {
let next = chars[i + 1];
if matches!(
next,
'*' | '_' | '~' | '|' | '[' | ']' | '(' | ')' | '`' | '\\' | '!'
) {
push_char!(next);
i += 2;
continue;
}
}
if i + 2 < n && chars[i] == '`' && chars[i + 1] == '`' && chars[i + 2] == '`' {
let start = i + 3;
let mut j = start;
while j + 2 < n {
if chars[j] == '`' && chars[j + 1] == '`' && chars[j + 2] == '`' {
break;
}
j += 1;
}
if j + 2 < n {
let block: String = chars[start..j].iter().collect();
let (lang, code) = if let Some(nl) = block.find('\n') {
(
block[..nl].trim().to_string(),
block[nl + 1..].trim_end_matches('\n').to_string(),
)
} else {
(String::new(), block)
};
let code_off = utf16_off;
let code_utf16: i32 = code.encode_utf16().count() as i32;
ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
offset: code_off,
length: code_utf16,
language: lang,
}));
for c in code.chars() {
push_char!(c);
}
i = j + 3;
continue;
}
}
if chars[i] == '`' {
let start = i + 1;
let mut j = start;
while j < n && chars[j] != '`' {
j += 1;
}
if j < n {
let code: String = chars[start..j].iter().collect();
let code_off = utf16_off;
let code_utf16: i32 = code.encode_utf16().count() as i32;
ents.push(tl::enums::MessageEntity::Code(
tl::types::MessageEntityCode {
offset: code_off,
length: code_utf16,
},
));
for c in code.chars() {
push_char!(c);
}
i = j + 1;
continue;
}
}
if chars[i] == '!'
&& i + 1 < n
&& chars[i + 1] == '['
&& let Some((end_i, doc_id, inner_text)) = parse_emoji_link(&chars, i)
{
let ent_off = utf16_off;
for c in inner_text.chars() {
push_char!(c);
}
ents.push(tl::enums::MessageEntity::CustomEmoji(
tl::types::MessageEntityCustomEmoji {
offset: ent_off,
length: utf16_off - ent_off,
document_id: doc_id,
},
));
i = end_i;
continue;
}
if chars[i] == '['
&& let Some((end_i, ent)) =
parse_link_entity(&chars, i, utf16_off, &mut out, &mut utf16_off)
{
ents.push(ent);
i = end_i;
continue;
}
if i + 1 < n {
let tag = match [chars[i], chars[i + 1]] {
['*', '*'] => Some(MarkdownTagV1::Bold),
['_', '_'] => Some(MarkdownTagV1::Italic), ['~', '~'] => Some(MarkdownTagV1::Strike), ['|', '|'] => Some(MarkdownTagV1::Spoiler),
_ => None,
};
if let Some(tag) = tag {
toggle_tag_v1(&mut open_stack, &mut ents, tag, utf16_off);
i += 2;
continue;
}
}
let one_tag = match chars[i] {
'*' => Some(MarkdownTagV1::Bold),
'_' => Some(MarkdownTagV1::Italic),
_ => None,
};
if let Some(tag) = one_tag {
toggle_tag_v1(&mut open_stack, &mut ents, tag, utf16_off);
i += 1;
continue;
}
push_char!(chars[i]);
i += 1;
}
(out, ents)
}
fn toggle_tag_v1(
stack: &mut Vec<(MarkdownTagV1, i32)>,
ents: &mut Vec<tl::enums::MessageEntity>,
tag: MarkdownTagV1,
utf16_off: i32,
) {
if let Some(pos) = stack.iter().rposition(|(t, _)| *t == tag) {
let (_, start_off) = stack.remove(pos);
let length = utf16_off - start_off;
if length > 0 {
ents.push(make_entity_v1(tag, start_off, length));
}
} else {
stack.push((tag, utf16_off));
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MarkdownTagV1 {
Bold,
Italic,
Strike,
Spoiler,
}
fn make_entity_v1(tag: MarkdownTagV1, offset: i32, length: i32) -> tl::enums::MessageEntity {
match tag {
MarkdownTagV1::Bold => {
tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset, length })
}
MarkdownTagV1::Italic => {
tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset, length })
}
MarkdownTagV1::Strike => {
tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset, length })
}
MarkdownTagV1::Spoiler => {
tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset, length })
}
}
}
pub fn parse_markdown_v2(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
let mut out = String::with_capacity(text.len());
let mut ents = Vec::new();
let chars: Vec<char> = text.chars().collect();
let n = chars.len();
let mut i = 0;
let mut open_stack: Vec<(MarkdownTagV2, i32)> = Vec::new();
let mut utf16_off: i32 = 0;
let mut bq_start_off: Option<i32> = None;
let mut bq_collapsed = false;
let mut at_line_start = true;
macro_rules! push_char {
($c:expr) => {{
let c: char = $c;
out.push(c);
utf16_off += c.len_utf16() as i32;
}};
}
while i < n {
if at_line_start {
let is_exp = i + 2 < n && chars[i] == '*' && chars[i + 1] == '*' && chars[i + 2] == '>';
let is_bq = !is_exp && chars[i] == '>';
if is_exp || is_bq {
let collapsed = is_exp;
match bq_start_off {
None => {
bq_start_off = Some(utf16_off);
bq_collapsed = collapsed;
}
Some(start_off) if bq_collapsed != collapsed => {
let length = utf16_off - start_off;
if length > 0 {
ents.push(make_blockquote(start_off, length, bq_collapsed));
}
bq_start_off = Some(utf16_off);
bq_collapsed = collapsed;
}
_ => {} }
i += if is_exp { 3 } else { 1 };
if i < n && chars[i] == ' ' {
i += 1;
} at_line_start = false;
continue;
} else if let Some(start_off) = bq_start_off.take() {
let length = utf16_off - start_off;
if length > 0 {
ents.push(make_blockquote(start_off, length, bq_collapsed));
}
}
at_line_start = false;
}
if chars[i] == '\\' && i + 1 < n && is_v2_escapable(chars[i + 1]) {
push_char!(chars[i + 1]);
i += 2;
continue;
}
if i + 2 < n && chars[i] == '`' && chars[i + 1] == '`' && chars[i + 2] == '`' {
let start = i + 3;
let mut j = start;
while j + 2 < n {
if chars[j] == '`' && chars[j + 1] == '`' && chars[j + 2] == '`' {
break;
}
j += 1;
}
if j + 2 < n {
let block: String = chars[start..j].iter().collect();
let (lang, code) = if let Some(nl) = block.find('\n') {
(
block[..nl].trim().to_string(),
block[nl + 1..].trim_end_matches('\n').to_string(),
)
} else {
(String::new(), block)
};
let code_off = utf16_off;
let code_utf16: i32 = code.encode_utf16().count() as i32;
ents.push(tl::enums::MessageEntity::Pre(tl::types::MessageEntityPre {
offset: code_off,
length: code_utf16,
language: lang,
}));
for c in code.chars() {
push_char!(c);
}
i = j + 3;
continue;
}
}
if chars[i] == '`' {
let start = i + 1;
let mut j = start;
while j < n && chars[j] != '`' {
j += 1;
}
if j < n {
let code: String = chars[start..j].iter().collect();
let code_off = utf16_off;
let code_utf16: i32 = code.encode_utf16().count() as i32;
ents.push(tl::enums::MessageEntity::Code(
tl::types::MessageEntityCode {
offset: code_off,
length: code_utf16,
},
));
for c in code.chars() {
push_char!(c);
}
i = j + 1;
continue;
}
}
if chars[i] == '!'
&& i + 1 < n
&& chars[i + 1] == '['
&& let Some((end_i, doc_id, inner_text)) = parse_emoji_link(&chars, i)
{
let ent_off = utf16_off;
for c in inner_text.chars() {
push_char!(c);
}
ents.push(tl::enums::MessageEntity::CustomEmoji(
tl::types::MessageEntityCustomEmoji {
offset: ent_off,
length: utf16_off - ent_off,
document_id: doc_id,
},
));
i = end_i;
continue;
}
if chars[i] == '['
&& let Some((end_i, ent)) =
parse_link_entity(&chars, i, utf16_off, &mut out, &mut utf16_off)
{
ents.push(ent);
i = end_i;
continue;
}
if i + 1 < n {
let tag = match [chars[i], chars[i + 1]] {
['*', '*'] => Some(MarkdownTagV2::Bold),
['_', '_'] => Some(MarkdownTagV2::Underline), ['|', '|'] => Some(MarkdownTagV2::Spoiler),
_ => None,
};
if let Some(tag) = tag {
toggle_tag_v2(&mut open_stack, &mut ents, tag, utf16_off);
i += 2;
continue;
}
}
let one_tag = match chars[i] {
'*' => Some(MarkdownTagV2::Bold),
'_' => Some(MarkdownTagV2::Italic),
'~' => Some(MarkdownTagV2::Strike), _ => None,
};
if let Some(tag) = one_tag {
toggle_tag_v2(&mut open_stack, &mut ents, tag, utf16_off);
i += 1;
continue;
}
if chars[i] == '\n' {
push_char!('\n');
at_line_start = true;
i += 1;
continue;
}
push_char!(chars[i]);
i += 1;
}
if let Some(start_off) = bq_start_off.take() {
let length = utf16_off - start_off;
if length > 0 {
ents.push(make_blockquote(start_off, length, bq_collapsed));
}
}
(out, ents)
}
fn toggle_tag_v2(
stack: &mut Vec<(MarkdownTagV2, i32)>,
ents: &mut Vec<tl::enums::MessageEntity>,
tag: MarkdownTagV2,
utf16_off: i32,
) {
if let Some(pos) = stack.iter().rposition(|(t, _)| *t == tag) {
let (_, start_off) = stack.remove(pos);
let length = utf16_off - start_off;
if length > 0 {
ents.push(make_entity_v2(tag, start_off, length));
}
} else {
stack.push((tag, utf16_off));
}
}
fn is_v2_escapable(c: char) -> bool {
matches!(
c,
'_' | '*'
| '['
| ']'
| '('
| ')'
| '~'
| '\\'
| '`'
| '>'
| '#'
| '+'
| '-'
| '='
| '|'
| '{'
| '}'
| '.'
| '!'
)
}
fn make_blockquote(offset: i32, length: i32, collapsed: bool) -> tl::enums::MessageEntity {
tl::enums::MessageEntity::Blockquote(tl::types::MessageEntityBlockquote {
collapsed,
offset,
length,
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MarkdownTagV2 {
Bold,
Italic,
Underline,
Strike,
Spoiler,
}
fn make_entity_v2(tag: MarkdownTagV2, offset: i32, length: i32) -> tl::enums::MessageEntity {
match tag {
MarkdownTagV2::Bold => {
tl::enums::MessageEntity::Bold(tl::types::MessageEntityBold { offset, length })
}
MarkdownTagV2::Italic => {
tl::enums::MessageEntity::Italic(tl::types::MessageEntityItalic { offset, length })
}
MarkdownTagV2::Underline => {
tl::enums::MessageEntity::Underline(tl::types::MessageEntityUnderline {
offset,
length,
})
}
MarkdownTagV2::Strike => {
tl::enums::MessageEntity::Strike(tl::types::MessageEntityStrike { offset, length })
}
MarkdownTagV2::Spoiler => {
tl::enums::MessageEntity::Spoiler(tl::types::MessageEntitySpoiler { offset, length })
}
}
}
pub fn parse_markdown(text: &str) -> (String, Vec<tl::enums::MessageEntity>) {
parse_markdown_v2(text)
}
pub fn generate_markdown_v2(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
use tl::enums::MessageEntity as ME;
struct BqRange {
offset: i32,
end: i32,
collapsed: bool,
}
let mut bq_ranges: Vec<BqRange> = Vec::new();
let mut pre_ranges: Vec<(i32, i32)> = Vec::new();
let mut code_ranges: Vec<(i32, i32)> = Vec::new();
let mut insertions: Vec<(i32, bool, String)> = Vec::new();
for ent in entities {
match ent {
ME::Bold(e) => {
insertions.push((e.offset, true, "*".into()));
insertions.push((e.offset + e.length, false, "*".into()));
}
ME::Italic(e) => {
insertions.push((e.offset, true, "_".into()));
insertions.push((e.offset + e.length, false, "_".into()));
}
ME::Underline(e) => {
insertions.push((e.offset, true, "__".into()));
insertions.push((e.offset + e.length, false, "__".into()));
}
ME::Strike(e) => {
insertions.push((e.offset, true, "~".into()));
insertions.push((e.offset + e.length, false, "~".into()));
}
ME::Spoiler(e) => {
insertions.push((e.offset, true, "||".into()));
insertions.push((e.offset + e.length, false, "||".into()));
}
ME::Code(e) => {
insertions.push((e.offset, true, "`".into()));
insertions.push((e.offset + e.length, false, "`".into()));
code_ranges.push((e.offset, e.offset + e.length));
}
ME::Pre(e) => {
let lang = e.language.trim();
insertions.push((e.offset, true, format!("```{lang}\n")));
insertions.push((e.offset + e.length, false, "\n```".into()));
pre_ranges.push((e.offset, e.offset + e.length));
}
ME::TextUrl(e) => {
insertions.push((e.offset, true, "[".into()));
insertions.push((e.offset + e.length, false, format!("]({})", e.url)));
}
ME::MentionName(e) => {
insertions.push((e.offset, true, "[".into()));
insertions.push((
e.offset + e.length,
false,
format!("](tg://user?id={})", e.user_id),
));
}
ME::CustomEmoji(e) => {
insertions.push((e.offset, true, "", e.document_id),
));
}
ME::Blockquote(e) => {
bq_ranges.push(BqRange {
offset: e.offset,
end: e.offset + e.length,
collapsed: e.collapsed,
});
}
_ => {}
}
}
insertions.sort_by(|(a_pos, a_open, _), (b_pos, b_open, _)| {
a_pos.cmp(b_pos).then_with(|| b_open.cmp(a_open))
});
let mut result = String::with_capacity(text.len() + 64);
let mut ins_idx = 0;
let mut utf16_pos: i32 = 0;
let mut at_line_start = true;
for ch in text.chars() {
while ins_idx < insertions.len() && insertions[ins_idx].0 <= utf16_pos {
result.push_str(&insertions[ins_idx].2);
ins_idx += 1;
}
if at_line_start
&& let Some(bq) = bq_ranges
.iter()
.find(|b| utf16_pos >= b.offset && utf16_pos < b.end)
{
if bq.collapsed {
result.push_str("**>");
} else {
result.push('>');
}
result.push(' ');
}
let in_verbatim = pre_ranges
.iter()
.any(|(s, e)| utf16_pos >= *s && utf16_pos < *e)
|| code_ranges
.iter()
.any(|(s, e)| utf16_pos >= *s && utf16_pos < *e);
if !in_verbatim && is_v2_escapable(ch) {
result.push('\\');
}
result.push(ch);
utf16_pos += ch.len_utf16() as i32;
at_line_start = ch == '\n';
}
while ins_idx < insertions.len() {
result.push_str(&insertions[ins_idx].2);
ins_idx += 1;
}
result
}
pub fn generate_markdown(text: &str, entities: &[tl::enums::MessageEntity]) -> String {
generate_markdown_v2(text, entities)
}
fn parse_emoji_link(chars: &[char], start: usize) -> Option<(usize, i64, String)> {
let n = chars.len();
let text_start = start + 2; let mut j = text_start;
while j < n && chars[j] != ']' {
j += 1;
}
if j >= n || j + 1 >= n || chars[j + 1] != '(' {
return None;
}
let link_start = j + 2;
let mut k = link_start;
while k < n && chars[k] != ')' {
k += 1;
}
if k >= n {
return None;
}
let inner_text: String = chars[text_start..j].iter().collect();
let url: String = chars[link_start..k].iter().collect();
let doc_id = url.strip_prefix("tg://emoji?id=")?.parse::<i64>().ok()?;
Some((k + 1, doc_id, inner_text))
}
fn parse_link_entity(
chars: &[char],
start: usize,
utf16_off_in: i32,
out: &mut String,
utf16_off: &mut i32,
) -> Option<(usize, tl::enums::MessageEntity)> {
let n = chars.len();
let text_start = start + 1;
let mut j = text_start;
let mut depth = 1i32;
while j < n {
if chars[j] == '[' {
depth += 1;
}
if chars[j] == ']' {
depth -= 1;
if depth == 0 {
break;
}
}
j += 1;
}
if j >= n || j + 1 >= n || chars[j + 1] != '(' {
return None;
}
let link_start = j + 2;
let mut k = link_start;
while k < n && chars[k] != ')' {
k += 1;
}
if k >= n {
return None;
}
let inner_text: String = chars[text_start..j].iter().collect();
let url: String = chars[link_start..k].iter().collect();
let ent_off = utf16_off_in;
for c in inner_text.chars() {
out.push(c);
*utf16_off += c.len_utf16() as i32;
}
let ent_len = *utf16_off - ent_off;
const MENTION_PFX: &str = "tg://user?id=";
let ent = if let Some(id_str) = url.strip_prefix(MENTION_PFX) {
if let Ok(uid) = id_str.parse::<i64>() {
tl::enums::MessageEntity::MentionName(tl::types::MessageEntityMentionName {
offset: ent_off,
length: ent_len,
user_id: uid,
})
} else {
tl::enums::MessageEntity::TextUrl(tl::types::MessageEntityTextUrl {
offset: ent_off,
length: ent_len,
url,
})
}
} else {
tl::enums::MessageEntity::TextUrl(tl::types::MessageEntityTextUrl {
offset: ent_off,
length: ent_len,
url,
})
};
Some((k + 1, ent))
}