#![cfg(feature = "markdown")]
use grammers_tl_types as tl;
use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
use super::common::{
MENTION_URL_PREFIX, Segment, after, before, inject_into_message, telegram_string_len,
update_entity_len,
};
pub fn parse_markdown_message(message: &str) -> (String, Vec<tl::enums::MessageEntity>) {
let mut text = String::with_capacity(message.len());
let mut entities = Vec::new();
let mut offset = 0;
let length = 0;
Parser::new(message).for_each(|event| match event {
Event::Text(string) => {
text.push_str(&string);
offset += telegram_string_len(&string);
}
Event::Code(string) => {
text.push_str(&string);
let length = telegram_string_len(&string);
entities.push(tl::types::MessageEntityCode { offset, length }.into());
offset += length;
}
Event::Start(Tag::Strong) => {
entities.push(tl::types::MessageEntityBold { offset, length }.into());
}
Event::End(TagEnd::Strong) => {
update_entity_len!(Bold(offset) in entities);
}
Event::Start(Tag::Emphasis) => {
entities.push(tl::types::MessageEntityItalic { offset, length }.into());
}
Event::End(TagEnd::Emphasis) => {
update_entity_len!(Italic(offset) in entities);
}
Event::Start(Tag::Link { dest_url, .. }) => {
if dest_url.starts_with(MENTION_URL_PREFIX) {
let user_id = dest_url[MENTION_URL_PREFIX.len()..].parse::<i64>().unwrap();
entities.push(
tl::types::MessageEntityMentionName {
offset,
length,
user_id,
}
.into(),
);
} else {
entities.push(
tl::types::MessageEntityTextUrl {
offset,
length,
url: dest_url.to_string(),
}
.into(),
);
}
}
Event::End(TagEnd::Link) => {
let is_mention = entities
.iter()
.rev()
.find_map(|e| match e {
tl::enums::MessageEntity::MentionName(_) => Some(true),
tl::enums::MessageEntity::TextUrl(_) => Some(false),
_ => None,
})
.unwrap_or(false);
if is_mention {
update_entity_len!(MentionName(offset) in entities);
} else {
update_entity_len!(TextUrl(offset) in entities);
}
}
Event::Start(Tag::CodeBlock(kind)) => {
let lang = match kind {
CodeBlockKind::Indented => "".to_string(),
CodeBlockKind::Fenced(lang) => lang.to_string(),
}
.to_string();
entities.push(
tl::types::MessageEntityPre {
offset,
length,
language: lang,
}
.into(),
);
}
Event::End(TagEnd::CodeBlock) => {
update_entity_len!(Pre(offset) in entities);
}
Event::HardBreak => {
text.push('\n');
offset += 1;
}
Event::End(TagEnd::Paragraph) => {
text.push_str("\n\n");
offset += 2;
}
_ => {}
});
text.truncate(text.trim_end().len());
(text, entities)
}
pub fn generate_markdown_message(message: &str, entities: &[tl::enums::MessageEntity]) -> String {
let mut insertions = Vec::with_capacity(
entities
.iter()
.map(|entity| match entity {
ME::Bold(_) => 2,
ME::Italic(_) => 2,
ME::Code(_) => 2,
ME::Pre(e) => {
if e.language.is_empty() {
2
} else {
4
}
}
ME::TextUrl(_) => 4,
ME::MentionName(_) => 4,
_ => 0,
})
.sum(),
);
use tl::enums::MessageEntity as ME;
entities
.iter()
.enumerate()
.for_each(|(i, entity)| match entity {
ME::Unknown(_) => {}
ME::Mention(_) => {}
ME::Hashtag(_) => {}
ME::BotCommand(_) => {}
ME::Url(_) => {}
ME::Email(_) => {}
ME::Bold(e) => {
insertions.push((before(i, 0, e.offset), Segment::Fixed("**")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("**")));
}
ME::Italic(e) => {
insertions.push((before(i, 0, e.offset), Segment::Fixed("_")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("_")));
}
ME::Code(e) => {
insertions.push((before(i, 0, e.offset), Segment::Fixed("`")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("`")));
}
ME::Pre(e) => {
if e.language.is_empty() {
insertions.push((before(i, 0, e.offset), Segment::Fixed("```\n")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("```\n")));
} else {
insertions.push((before(i, 0, e.offset), Segment::Fixed("```")));
insertions.push((before(i, 1, e.offset), Segment::String(&e.language)));
insertions.push((before(i, 2, e.offset), Segment::Fixed("\n")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("```\n")));
}
}
ME::TextUrl(e) => {
insertions.push((before(i, 0, e.offset), Segment::Fixed("[")));
insertions.push((after(i, 0, e.offset + e.length), Segment::Fixed("](")));
insertions.push((after(i, 1, e.offset + e.length), Segment::String(&e.url)));
insertions.push((after(i, 2, e.offset + e.length), Segment::Fixed(")")));
}
ME::MentionName(e) => {
insertions.push((before(i, 0, e.offset), Segment::Fixed("[")));
insertions.push((
after(i, 0, e.offset + e.length),
Segment::Fixed("](tg://user?id="),
));
insertions.push((after(i, 1, e.offset + e.length), Segment::Number(e.user_id)));
insertions.push((after(i, 2, e.offset + e.length), Segment::Fixed(")")));
}
ME::InputMessageEntityMentionName(_) => {}
ME::Phone(_) => {}
ME::Cashtag(_) => {}
ME::Underline(_) => {}
ME::Strike(_) => {}
ME::Blockquote(_) => {}
ME::BankCard(_) => {}
ME::Spoiler(_) => {}
ME::CustomEmoji(_) => {}
});
inject_into_message(message, insertions)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_leading() {
let (text, entities) = parse_markdown_message("**Hello** world!");
assert_eq!(text, "Hello world!");
assert_eq!(
entities,
vec![
tl::types::MessageEntityBold {
offset: 0,
length: 5
}
.into()
]
);
}
#[test]
fn parse_trailing() {
let (text, entities) = parse_markdown_message("Hello **world!**");
assert_eq!(text, "Hello world!");
assert_eq!(
entities,
vec![
tl::types::MessageEntityBold {
offset: 6,
length: 6
}
.into()
]
);
}
#[test]
fn parse_emoji() {
let (text, entities) = parse_markdown_message("A **little 🦀** here");
assert_eq!(text, "A little 🦀 here");
assert_eq!(
entities,
vec![
tl::types::MessageEntityBold {
offset: 2,
length: 9
}
.into()
]
);
}
#[test]
fn parse_all_entities() {
let (text, entities) = parse_markdown_message(
"Some **bold** (__strong__), *italics* (_cursive_), inline `code`, \
a\n```rust\npre\n```\nblock, a [**link**](https://example.com), and \
[mentions](tg://user?id=12345678)",
);
assert_eq!(
text,
"Some bold (strong), italics (cursive), inline code, a\n\npre\nblock, a link, and mentions"
);
assert_eq!(
entities,
vec![
tl::types::MessageEntityBold {
offset: 5,
length: 4
}
.into(),
tl::types::MessageEntityBold {
offset: 11,
length: 6
}
.into(),
tl::types::MessageEntityItalic {
offset: 20,
length: 7
}
.into(),
tl::types::MessageEntityItalic {
offset: 29,
length: 7
}
.into(),
tl::types::MessageEntityCode {
offset: 46,
length: 4
}
.into(),
tl::types::MessageEntityPre {
offset: 55,
length: 4,
language: "rust".to_string()
}
.into(),
tl::types::MessageEntityTextUrl {
offset: 68,
length: 4,
url: "https://example.com".to_string()
}
.into(),
tl::types::MessageEntityBold {
offset: 68,
length: 4,
}
.into(),
tl::types::MessageEntityMentionName {
offset: 78,
length: 8,
user_id: 12345678
}
.into(),
]
);
}
#[test]
fn parse_nested_entities() {
let (text, entities) = parse_markdown_message("Some **bold _both_** _italics_");
assert_eq!(text, "Some bold both italics");
assert_eq!(
entities,
vec![
tl::types::MessageEntityBold {
offset: 5,
length: 9
}
.into(),
tl::types::MessageEntityItalic {
offset: 10,
length: 4
}
.into(),
tl::types::MessageEntityItalic {
offset: 15,
length: 7
}
.into(),
]
);
}
#[test]
fn parse_then_unparse() {
let markdown = "Some **bold 🤷🏽♀️**, _italics_, inline `🤷🏽♀️ code`, \
a\n\n```rust\npre\n```\nblock, a [**link**](https://example.com), and \
[mentions](tg://user?id=12345678)";
let (text, entities) = parse_markdown_message(markdown);
let generated = generate_markdown_message(&text, &entities);
assert_eq!(generated, markdown);
}
#[test]
fn parse_then_unparse_overlapping() {
let markdown = "_a_[**b**](https://example.com)`c`";
let (text, entities) = parse_markdown_message(markdown);
let generated = generate_markdown_message(&text, &entities);
assert_eq!(generated, markdown);
}
}