1use std::{cmp, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::types::{User, UserId};
6
7#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
13pub struct MessageEntity {
14 #[serde(flatten)]
15 pub kind: MessageEntityKind,
16
17 pub offset: usize,
19
20 pub length: usize,
22}
23
24#[derive(Clone, Debug, Eq, Hash, PartialEq)]
39pub struct MessageEntityRef<'a> {
40 message: &'a str,
41 range: Range<usize>,
42 kind: &'a MessageEntityKind,
43}
44
45impl MessageEntity {
46 #[must_use]
47 pub const fn new(kind: MessageEntityKind, offset: usize, length: usize) -> Self {
48 Self {
49 kind,
50 offset,
51 length,
52 }
53 }
54
55 #[must_use]
57 pub const fn bold(offset: usize, length: usize) -> Self {
58 Self {
59 kind: MessageEntityKind::Bold,
60 offset,
61 length,
62 }
63 }
64
65 #[must_use]
67 pub const fn italic(offset: usize, length: usize) -> Self {
68 Self {
69 kind: MessageEntityKind::Italic,
70 offset,
71 length,
72 }
73 }
74
75 #[must_use]
77 pub const fn underline(offset: usize, length: usize) -> Self {
78 Self {
79 kind: MessageEntityKind::Underline,
80 offset,
81 length,
82 }
83 }
84
85 #[must_use]
87 pub const fn strikethrough(offset: usize, length: usize) -> Self {
88 Self {
89 kind: MessageEntityKind::Strikethrough,
90 offset,
91 length,
92 }
93 }
94
95 #[must_use]
97 pub const fn spoiler(offset: usize, length: usize) -> Self {
98 Self {
99 kind: MessageEntityKind::Spoiler,
100 offset,
101 length,
102 }
103 }
104
105 #[must_use]
107 pub const fn code(offset: usize, length: usize) -> Self {
108 Self {
109 kind: MessageEntityKind::Code,
110 offset,
111 length,
112 }
113 }
114
115 #[must_use]
117 pub const fn pre(language: Option<String>, offset: usize, length: usize) -> Self {
118 Self {
119 kind: MessageEntityKind::Pre { language },
120 offset,
121 length,
122 }
123 }
124
125 #[must_use]
127 pub const fn text_link(url: url::Url, offset: usize, length: usize) -> Self {
128 Self {
129 kind: MessageEntityKind::TextLink { url },
130 offset,
131 length,
132 }
133 }
134
135 #[must_use]
142 pub const fn text_mention(user: User, offset: usize, length: usize) -> Self {
143 Self {
144 kind: MessageEntityKind::TextMention { user },
145 offset,
146 length,
147 }
148 }
149
150 #[must_use]
153 pub fn text_mention_id(user_id: UserId, offset: usize, length: usize) -> Self {
154 Self {
155 kind: MessageEntityKind::TextLink { url: user_id.url() },
156 offset,
157 length,
158 }
159 }
160
161 #[must_use]
163 pub const fn custom_emoji(custom_emoji_id: String, offset: usize, length: usize) -> Self {
164 Self {
165 kind: MessageEntityKind::CustomEmoji { custom_emoji_id },
166 offset,
167 length,
168 }
169 }
170
171 #[must_use]
172 pub fn kind(mut self, val: MessageEntityKind) -> Self {
173 self.kind = val;
174 self
175 }
176
177 #[must_use]
178 pub const fn offset(mut self, val: usize) -> Self {
179 self.offset = val;
180 self
181 }
182
183 #[must_use]
184 pub const fn length(mut self, val: usize) -> Self {
185 self.length = val;
186 self
187 }
188}
189
190impl<'a> MessageEntityRef<'a> {
191 #[must_use]
193 pub fn kind(&self) -> &'a MessageEntityKind {
194 self.kind
195 }
196
197 #[must_use]
199 pub fn text(&self) -> &'a str {
200 &self.message[self.range.clone()]
201 }
202
203 #[must_use]
208 pub fn range(&self) -> Range<usize> {
209 self.range.clone()
210 }
211
212 #[must_use]
215 pub fn start(&self) -> usize {
216 self.range.start
217 }
218
219 #[must_use]
222 pub fn end(&self) -> usize {
223 self.range.end
224 }
225
226 #[allow(clippy::len_without_is_empty)]
228 #[must_use]
229 pub fn len(&self) -> usize {
230 self.range.len()
231 }
232
233 #[must_use]
235 pub fn message_text(&self) -> &'a str {
236 self.message
237 }
238
239 #[must_use]
241 pub fn parse(text: &'a str, entities: &'a [MessageEntity]) -> Vec<Self> {
242 let mut entities: Vec<_> = entities
244 .iter()
245 .map(|e| Self {
246 message: text,
247 range: e.offset..e.offset + e.length,
248 kind: &e.kind,
249 })
250 .collect();
251
252 let mut offsets: Vec<&mut usize> = entities
256 .iter_mut()
257 .flat_map(
258 |Self {
259 range: Range { start, end },
260 ..
261 }| [start, end],
262 )
263 .collect();
264
265 offsets.sort_unstable_by_key(|&&mut offset| cmp::Reverse(offset));
268
269 let _ = text
270 .chars()
271 .chain(['\0']) .try_fold((0, 0), |(len_utf8, len_utf16), c| {
273 if offsets.is_empty() {
275 return None;
276 }
277
278 while offsets
280 .last()
281 .map(|&&mut offset| offset <= len_utf16)
282 .unwrap_or(false)
283 {
284 let offset = offsets.pop().unwrap();
285 assert_eq!(*offset, len_utf16, "Invalid utf-16 offset");
286
287 *offset = len_utf8;
289 }
290
291 Some((len_utf8 + c.len_utf8(), len_utf16 + c.len_utf16()))
293 });
294
295 entities
296 }
297}
298
299#[serde_with_macros::skip_serializing_none]
300#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
301#[serde(rename_all = "snake_case")]
302#[serde(tag = "type")]
303pub enum MessageEntityKind {
304 Mention,
305 Hashtag,
306 Cashtag,
307 BotCommand,
308 Url,
309 Email,
310 PhoneNumber,
311 Bold,
312 Italic,
313 Underline,
314 Strikethrough,
315 Spoiler,
316 Code,
317 Pre { language: Option<String> },
318 TextLink { url: url::Url },
319 TextMention { user: User },
320 CustomEmoji { custom_emoji_id: String }, }
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use cool_asserts::assert_matches;
327 use MessageEntity;
328 use MessageEntityKind::*;
329
330 #[test]
331 fn recursive_kind() {
332 use serde_json::from_str;
333
334 assert_eq!(
335 MessageEntity {
336 kind: MessageEntityKind::TextLink {
337 url: url::Url::parse("https://example.com").unwrap(),
338 },
339 offset: 1,
340 length: 2,
341 },
342 from_str::<MessageEntity>(
343 r#"{"type":"text_link","url":"https://example.com","offset":1,"length":2}"#
344 )
345 .unwrap()
346 );
347 }
348
349 #[test]
350 fn pre() {
351 use serde_json::from_str;
352
353 assert_eq!(
354 MessageEntity {
355 kind: MessageEntityKind::Pre {
356 language: Some("rust".to_string())
357 },
358 offset: 1,
359 length: 2,
360 },
361 from_str::<MessageEntity>(r#"{"type":"pre","offset":1,"length":2,"language":"rust"}"#)
362 .unwrap()
363 );
364 }
365
366 #[test]
368 fn pre_with_none_language() {
369 use serde_json::to_string;
370
371 assert_eq!(
372 to_string(&MessageEntity {
373 kind: MessageEntityKind::Pre { language: None },
374 offset: 1,
375 length: 2,
376 })
377 .unwrap()
378 .find("language"),
379 None
380 );
381 }
382
383 #[test]
384 fn parse_быба() {
385 let parsed = MessageEntityRef::parse(
386 "быба",
387 &[
388 MessageEntity {
389 kind: Strikethrough,
390 offset: 0,
391 length: 1,
392 },
393 MessageEntity {
394 kind: Bold,
395 offset: 1,
396 length: 1,
397 },
398 MessageEntity {
399 kind: Italic,
400 offset: 2,
401 length: 1,
402 },
403 MessageEntity {
404 kind: Code,
405 offset: 3,
406 length: 1,
407 },
408 ],
409 );
410
411 assert_matches!(
412 parsed,
413 [
414 entity if entity.text() == "б" && entity.kind() == &Strikethrough,
415 entity if entity.text() == "ы" && entity.kind() == &Bold,
416 entity if entity.text() == "б" && entity.kind() == &Italic,
417 entity if entity.text() == "а" && entity.kind() == &Code,
418
419 ]
420 );
421 }
422
423 #[test]
424 fn parse_symbol_24bit() {
425 let parsed = MessageEntityRef::parse(
426 "xx আ #tt",
427 &[MessageEntity {
428 kind: Hashtag,
429 offset: 5,
430 length: 3,
431 }],
432 );
433
434 assert_matches!(
435 parsed,
436 [entity if entity.text() == "#tt" && entity.kind() == &Hashtag]
437 );
438 }
439
440 #[test]
441 fn parse_enclosed() {
442 let parsed = MessageEntityRef::parse(
443 "b i b",
444 &[
446 MessageEntity {
447 kind: Bold,
448 offset: 0,
449 length: 2,
450 },
451 MessageEntity {
452 kind: Bold,
453 offset: 2,
454 length: 3,
455 },
456 MessageEntity {
457 kind: Italic,
458 offset: 2,
459 length: 1,
460 },
461 ],
462 );
463
464 assert_matches!(
465 parsed,
466 [
467 entity if entity.text() == "b " && entity.kind() == &Bold,
468 entity if entity.text() == "i b" && entity.kind() == &Bold,
469 entity if entity.text() == "i" && entity.kind() == &Italic,
470 ]
471 );
472 }
473
474 #[test]
475 fn parse_nothing() {
476 let parsed = MessageEntityRef::parse("a", &[]);
477 assert_eq!(parsed, []);
478 }
479
480 #[test]
481 fn parse_empty() {
482 let parsed = MessageEntityRef::parse(
485 "",
486 &[
487 MessageEntity {
488 kind: Bold,
489 offset: 0,
490 length: 0,
491 },
492 MessageEntity {
493 kind: Italic,
494 offset: 0,
495 length: 0,
496 },
497 ],
498 );
499
500 assert_matches!(
501 parsed,
502 [
503 entity if entity.text() == "" && entity.kind() == &Bold,
504 entity if entity.text() == "" && entity.kind() == &Italic,
505 ]
506 );
507 }
508}