use crate::types::{Message, MessageEntity};
#[derive(Debug, Clone, PartialEq)]
pub struct ParsedEntity {
pub kind: String,
pub text: String,
pub offset: usize,
pub length: usize,
pub url: Option<String>,
pub user: Option<Box<crate::types::User>>,
pub language: Option<String>,
pub custom_emoji_id: Option<String>,
}
pub fn parse_entity(text: &str, entity: &MessageEntity) -> ParsedEntity {
parse_entity_from_utf16(&utf16_encode(text), entity)
}
pub fn parse_entities(text: &str, entities: &[MessageEntity]) -> Vec<ParsedEntity> {
let utf16 = utf16_encode(text);
entities
.iter()
.map(|e| parse_entity_from_utf16(&utf16, e))
.collect()
}
fn utf16_encode(s: &str) -> Vec<u16> {
s.encode_utf16().collect()
}
fn parse_entity_from_utf16(utf16: &[u16], entity: &MessageEntity) -> ParsedEntity {
let start16 = entity.offset as usize;
let end16 = start16 + entity.length as usize;
let start16 = start16.min(utf16.len());
let end16 = end16.min(utf16.len());
let entity_text = String::from_utf16_lossy(&utf16[start16..end16]);
let prefix_utf8 = String::from_utf16_lossy(&utf16[..start16]);
let utf8_start = prefix_utf8.len();
let utf8_len = entity_text.len();
let url = entity
.url
.clone()
.or_else(|| (entity.r#type == "url").then(|| entity_text.clone()));
ParsedEntity {
kind: entity.r#type.clone(),
text: entity_text,
offset: utf8_start,
length: utf8_len,
url,
user: entity.user.clone(),
language: entity.language.clone(),
custom_emoji_id: entity.custom_emoji_id.clone(),
}
}
pub trait MessageEntityExt {
fn parse_entities(&self) -> Vec<ParsedEntity>;
fn parse_caption_entities(&self) -> Vec<ParsedEntity>;
fn parse_any_entities(&self) -> Vec<ParsedEntity>;
}
impl MessageEntityExt for Message {
fn parse_entities(&self) -> Vec<ParsedEntity> {
match (&self.text, &self.entities) {
(Some(text), Some(entities)) => parse_entities(text, entities),
_ => vec![],
}
}
fn parse_caption_entities(&self) -> Vec<ParsedEntity> {
match (&self.caption, &self.caption_entities) {
(Some(caption), Some(entities)) => parse_entities(caption, entities),
_ => vec![],
}
}
fn parse_any_entities(&self) -> Vec<ParsedEntity> {
let mut out = self.parse_entities();
out.extend(self.parse_caption_entities());
out
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::MessageEntity;
fn make_entity(kind: &str, offset: i64, length: i64) -> MessageEntity {
MessageEntity {
r#type: kind.to_string(),
offset,
length,
url: None,
user: None,
language: None,
custom_emoji_id: None,
unix_time: None,
date_time_format: None,
}
}
#[test]
fn ascii_entity() {
let text = "Hello world";
let entity = make_entity("bold", 6, 5);
let parsed = parse_entity(text, &entity);
assert_eq!(parsed.text, "world");
assert_eq!(parsed.offset, 6);
}
#[test]
fn emoji_entity_utf16() {
let text = "Hi 😀 there";
let entity = make_entity("bold", 3, 2); let parsed = parse_entity(text, &entity);
assert_eq!(parsed.text, "😀");
assert_eq!(&text[parsed.offset..parsed.offset + parsed.length], "😀");
}
#[test]
fn multi_emoji() {
let text = "😀😀";
let entity = make_entity("italic", 2, 2);
let parsed = parse_entity(text, &entity);
assert_eq!(parsed.text, "😀");
}
#[test]
fn url_entity_fills_url_field() {
let text = "https://example.com";
let entity = make_entity("url", 0, 19);
let parsed = parse_entity(text, &entity);
assert_eq!(parsed.url, Some("https://example.com".to_string()));
}
#[test]
fn empty_entities() {
let entities: Vec<MessageEntity> = vec![];
let result = parse_entities("some text", &entities);
assert!(result.is_empty());
}
}