1use regex::Regex;
6use std::sync::LazyLock;
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11#[serde(rename_all = "camelCase")]
12pub struct Facet {
13 pub index: ByteSlice,
14 pub features: Vec<FacetFeature>,
15}
16
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
19#[serde(rename_all = "camelCase")]
20pub struct ByteSlice {
21 pub byte_start: usize,
22 pub byte_end: usize,
23}
24
25#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
27#[serde(tag = "$type")]
28pub enum FacetFeature {
29 #[serde(rename = "app.bsky.richtext.facet#mention")]
30 Mention { did: String },
31 #[serde(rename = "app.bsky.richtext.facet#link")]
32 Link { uri: String },
33 #[serde(rename = "app.bsky.richtext.facet#tag")]
34 Tag { tag: String },
35}
36
37#[derive(Debug, Clone)]
39pub struct RichTextSegment {
40 pub text: String,
41 pub facet: Option<Facet>,
42}
43
44impl RichTextSegment {
45 #[must_use]
46 pub fn is_mention(&self) -> bool {
47 self.facet.as_ref().is_some_and(|f| {
48 f.features
49 .iter()
50 .any(|feat| matches!(feat, FacetFeature::Mention { .. }))
51 })
52 }
53
54 #[must_use]
55 pub fn is_link(&self) -> bool {
56 self.facet.as_ref().is_some_and(|f| {
57 f.features
58 .iter()
59 .any(|feat| matches!(feat, FacetFeature::Link { .. }))
60 })
61 }
62
63 #[must_use]
64 pub fn is_tag(&self) -> bool {
65 self.facet.as_ref().is_some_and(|f| {
66 f.features
67 .iter()
68 .any(|feat| matches!(feat, FacetFeature::Tag { .. }))
69 })
70 }
71}
72
73#[derive(Debug, Clone)]
78pub struct RichText {
79 text: String,
80 facets: Vec<Facet>,
81}
82
83impl RichText {
84 pub fn new(text: impl Into<String>, facets: Option<Vec<Facet>>) -> Self {
86 let text = text.into();
87 let mut facets = facets.unwrap_or_default();
88 facets.retain(|f| f.index.byte_start < f.index.byte_end);
90 facets.sort_by_key(|f| f.index.byte_start);
91 Self { text, facets }
92 }
93
94 #[must_use]
96 pub fn text(&self) -> &str {
97 &self.text
98 }
99
100 #[must_use]
102 pub fn facets(&self) -> &[Facet] {
103 &self.facets
104 }
105
106 #[must_use]
108 pub fn len(&self) -> usize {
109 self.text.len()
110 }
111
112 #[must_use]
114 pub fn is_empty(&self) -> bool {
115 self.text.is_empty()
116 }
117
118 #[must_use]
120 pub fn grapheme_length(&self) -> usize {
121 self.text.graphemes(true).count()
122 }
123
124 pub fn detect_facets(&mut self) {
128 self.facets = detect_facets(&self.text);
129 }
130
131 pub fn insert(&mut self, index: usize, insert_text: &str) {
133 let added = insert_text.len();
134 self.text.insert_str(index, insert_text);
135
136 for facet in &mut self.facets {
137 if index <= facet.index.byte_start {
138 facet.index.byte_start += added;
140 facet.index.byte_end += added;
141 } else if index < facet.index.byte_end {
142 facet.index.byte_end += added;
144 }
145 }
147 }
148
149 pub fn delete(&mut self, start: usize, end: usize) {
151 let removed = end - start;
152
153 self.text.replace_range(start..end, "");
155
156 for facet in &mut self.facets {
157 let fs = facet.index.byte_start;
158 let fe = facet.index.byte_end;
159
160 if start <= fs && end >= fe {
161 facet.index.byte_start = start;
163 facet.index.byte_end = start;
164 } else if start >= fe {
165 } else if start > fs && end >= fe {
167 facet.index.byte_end = start;
169 } else if start > fs && end < fe {
170 facet.index.byte_end -= removed;
172 } else if start <= fs && end > fs && end < fe {
173 facet.index.byte_start = start;
175 facet.index.byte_end -= removed;
176 } else if end <= fs {
177 facet.index.byte_start -= removed;
179 facet.index.byte_end -= removed;
180 }
181 }
182
183 self.facets
185 .retain(|f| f.index.byte_start < f.index.byte_end);
186 }
187
188 #[must_use]
190 pub fn segments(&self) -> Vec<RichTextSegment> {
191 if self.facets.is_empty() {
192 return vec![RichTextSegment {
193 text: self.text.clone(),
194 facet: None,
195 }];
196 }
197
198 let mut segments = Vec::new();
199 let mut cursor = 0;
200
201 for facet in &self.facets {
202 let start = facet.index.byte_start;
203 let end = facet.index.byte_end.min(self.text.len());
204
205 if cursor < start {
207 segments.push(RichTextSegment {
208 text: self.text[cursor..start].to_string(),
209 facet: None,
210 });
211 }
212
213 let seg_text = &self.text[start..end];
215 if seg_text.trim().is_empty() {
216 segments.push(RichTextSegment {
217 text: seg_text.to_string(),
218 facet: None,
219 });
220 } else {
221 segments.push(RichTextSegment {
222 text: seg_text.to_string(),
223 facet: Some(facet.clone()),
224 });
225 }
226
227 cursor = end;
228 }
229
230 if cursor < self.text.len() {
232 segments.push(RichTextSegment {
233 text: self.text[cursor..].to_string(),
234 facet: None,
235 });
236 }
237
238 segments
239 }
240}
241
242static MENTION_RE: LazyLock<Regex> = LazyLock::new(|| {
245 Regex::new(r"(?:^|\s|\()(@)([a-zA-Z0-9]([a-zA-Z0-9.-]*[a-zA-Z0-9])?\.[a-zA-Z]{2,})")
246 .expect("mention regex")
247});
248
249static URL_RE: LazyLock<Regex> =
250 LazyLock::new(|| Regex::new(r"(?:^|\s|\()(https?://[\S]+)").expect("url regex"));
251
252static TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
253 Regex::new(r"(?:^|\s)[##]([^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*[^\d\s\p{Punctuation}\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]+[^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*)")
254 .expect("tag regex")
255});
256
257pub fn detect_facets(text: &str) -> Vec<Facet> {
259 let mut facets = Vec::new();
260
261 for cap in MENTION_RE.captures_iter(text) {
263 let handle_match = cap.get(2).unwrap();
264 let handle = handle_match.as_str();
265
266 let at_match = cap.get(1).unwrap();
268 let byte_start = at_match.start();
269 let byte_end = handle_match.end();
270
271 facets.push(Facet {
272 index: ByteSlice {
273 byte_start,
274 byte_end,
275 },
276 features: vec![FacetFeature::Mention {
277 did: handle.to_string(),
278 }],
279 });
280 }
281
282 for cap in URL_RE.captures_iter(text) {
284 let url_match = cap.get(1).unwrap();
285 let mut uri = url_match.as_str().to_string();
286 let byte_start = url_match.start();
287 let mut byte_end = url_match.end();
288
289 while uri.ends_with(['.', ',', ';', ':', '!', '?']) {
291 uri.pop();
292 byte_end -= 1;
293 }
294
295 if uri.ends_with(')') && !uri.contains('(') {
297 uri.pop();
298 byte_end -= 1;
299 }
300
301 facets.push(Facet {
302 index: ByteSlice {
303 byte_start,
304 byte_end,
305 },
306 features: vec![FacetFeature::Link { uri }],
307 });
308 }
309
310 for cap in TAG_RE.captures_iter(text) {
312 let tag_match = cap.get(1).unwrap();
313 let tag = tag_match.as_str();
314
315 if tag.is_empty() || tag.len() > 64 {
317 continue;
318 }
319
320 let tag_trimmed = tag.trim_end_matches(|c: char| c.is_ascii_punctuation());
322 if tag_trimmed.is_empty() {
323 continue;
324 }
325
326 let full_match = cap.get(0).unwrap();
328 let hash_pos = full_match
330 .as_str()
331 .find('#')
332 .or_else(|| full_match.as_str().find('#'))
333 .unwrap_or(0);
334 let byte_start = full_match.start() + hash_pos;
335 let byte_end = byte_start + 1 + tag_trimmed.len(); let byte_end = byte_end.min(text.len());
339
340 facets.push(Facet {
341 index: ByteSlice {
342 byte_start,
343 byte_end,
344 },
345 features: vec![FacetFeature::Tag {
346 tag: tag_trimmed.to_string(),
347 }],
348 });
349 }
350
351 facets.sort_by_key(|f| f.index.byte_start);
352 facets
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn basic_text_no_facets() {
361 let rt = RichText::new("Hello, world!", None);
362 assert_eq!(rt.text(), "Hello, world!");
363 assert!(rt.facets().is_empty());
364 assert_eq!(rt.len(), 13);
365 assert_eq!(rt.grapheme_length(), 13);
366 }
367
368 #[test]
369 fn detect_mention() {
370 let mut rt = RichText::new("Hello @alice.bsky.social!", None);
371 rt.detect_facets();
372 assert_eq!(rt.facets().len(), 1);
373 let f = &rt.facets()[0];
374 assert!(
375 matches!(&f.features[0], FacetFeature::Mention { did } if did == "alice.bsky.social")
376 );
377 assert_eq!(
378 &rt.text()[f.index.byte_start..f.index.byte_end],
379 "@alice.bsky.social"
380 );
381 }
382
383 #[test]
384 fn detect_url() {
385 let mut rt = RichText::new("Check https://example.com/path here", None);
386 rt.detect_facets();
387 assert_eq!(rt.facets().len(), 1);
388 let f = &rt.facets()[0];
389 assert!(
390 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/path")
391 );
392 }
393
394 #[test]
395 fn detect_url_strips_trailing_punctuation() {
396 let mut rt = RichText::new("Visit https://example.com.", None);
397 rt.detect_facets();
398 assert_eq!(rt.facets().len(), 1);
399 let f = &rt.facets()[0];
400 assert!(
401 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com")
402 );
403 }
404
405 #[test]
406 fn detect_url_strips_trailing_paren_without_open() {
407 let mut rt = RichText::new("(see https://example.com/page)", None);
408 rt.detect_facets();
409 assert_eq!(rt.facets().len(), 1);
410 let f = &rt.facets()[0];
411 assert!(
413 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/page")
414 );
415 }
416
417 #[test]
418 fn detect_hashtag() {
419 let mut rt = RichText::new("Hello #atproto world", None);
420 rt.detect_facets();
421 assert_eq!(rt.facets().len(), 1);
422 let f = &rt.facets()[0];
423 assert!(matches!(&f.features[0], FacetFeature::Tag { tag } if tag == "atproto"));
424 }
425
426 #[test]
427 fn detect_multiple_facets() {
428 let mut rt = RichText::new("@alice.test posted https://example.com #cool", None);
429 rt.detect_facets();
430 assert_eq!(rt.facets().len(), 3);
431 assert!(
432 rt.facets()[0]
433 .features
434 .iter()
435 .any(|f| matches!(f, FacetFeature::Mention { .. }))
436 );
437 assert!(
438 rt.facets()[1]
439 .features
440 .iter()
441 .any(|f| matches!(f, FacetFeature::Link { .. }))
442 );
443 assert!(
444 rt.facets()[2]
445 .features
446 .iter()
447 .any(|f| matches!(f, FacetFeature::Tag { .. }))
448 );
449 }
450
451 #[test]
452 fn segments_no_facets() {
453 let rt = RichText::new("Hello world", None);
454 let segs = rt.segments();
455 assert_eq!(segs.len(), 1);
456 assert_eq!(segs[0].text, "Hello world");
457 assert!(segs[0].facet.is_none());
458 }
459
460 #[test]
461 fn segments_with_facets() {
462 let mut rt = RichText::new("Hello @alice.test world", None);
463 rt.detect_facets();
464 let segs = rt.segments();
465 assert_eq!(segs.len(), 3);
466 assert_eq!(segs[0].text, "Hello ");
467 assert!(segs[0].facet.is_none());
468 assert_eq!(segs[1].text, "@alice.test");
469 assert!(segs[1].is_mention());
470 assert_eq!(segs[2].text, " world");
471 assert!(segs[2].facet.is_none());
472 }
473
474 #[test]
475 fn insert_before_facet() {
476 let facets = vec![Facet {
477 index: ByteSlice {
478 byte_start: 6,
479 byte_end: 11,
480 },
481 features: vec![FacetFeature::Tag {
482 tag: "test".to_string(),
483 }],
484 }];
485 let mut rt = RichText::new("Hello #test", Some(facets));
486 rt.insert(0, "Hey ");
487 assert_eq!(rt.text(), "Hey Hello #test");
488 assert_eq!(rt.facets()[0].index.byte_start, 10);
489 assert_eq!(rt.facets()[0].index.byte_end, 15);
490 }
491
492 #[test]
493 fn insert_inside_facet() {
494 let facets = vec![Facet {
495 index: ByteSlice {
496 byte_start: 0,
497 byte_end: 5,
498 },
499 features: vec![FacetFeature::Link {
500 uri: "https://example.com".to_string(),
501 }],
502 }];
503 let mut rt = RichText::new("Hello world", Some(facets));
504 rt.insert(3, "XX");
505 assert_eq!(rt.text(), "HelXXlo world");
506 assert_eq!(rt.facets()[0].index.byte_start, 0);
507 assert_eq!(rt.facets()[0].index.byte_end, 7);
508 }
509
510 #[test]
511 fn delete_before_facet() {
512 let facets = vec![Facet {
513 index: ByteSlice {
514 byte_start: 6,
515 byte_end: 11,
516 },
517 features: vec![FacetFeature::Tag {
518 tag: "test".to_string(),
519 }],
520 }];
521 let mut rt = RichText::new("Hello #test", Some(facets));
522 rt.delete(0, 6);
523 assert_eq!(rt.text(), "#test");
524 assert_eq!(rt.facets()[0].index.byte_start, 0);
525 assert_eq!(rt.facets()[0].index.byte_end, 5);
526 }
527
528 #[test]
529 fn delete_spanning_facet_removes_it() {
530 let facets = vec![Facet {
531 index: ByteSlice {
532 byte_start: 6,
533 byte_end: 11,
534 },
535 features: vec![FacetFeature::Tag {
536 tag: "test".to_string(),
537 }],
538 }];
539 let mut rt = RichText::new("Hello #test world", Some(facets));
540 rt.delete(5, 12);
541 assert_eq!(rt.text(), "Helloworld");
542 assert!(rt.facets().is_empty());
543 }
544
545 #[test]
546 fn grapheme_length_emoji() {
547 let rt = RichText::new("Hi 👋🏽", None);
548 assert_eq!(rt.grapheme_length(), 4);
550 assert!(rt.len() > 4);
552 }
553
554 #[test]
555 fn utf8_byte_offsets_work_natively() {
556 let text = "Héllo @alice.test";
558 let mut rt = RichText::new(text, None);
559 rt.detect_facets();
560 assert_eq!(rt.facets().len(), 1);
561 let f = &rt.facets()[0];
562 assert_eq!(
563 &rt.text()[f.index.byte_start..f.index.byte_end],
564 "@alice.test"
565 );
566 }
567
568 #[test]
569 fn empty_text() {
570 let rt = RichText::new("", None);
571 assert!(rt.is_empty());
572 assert_eq!(rt.len(), 0);
573 assert_eq!(rt.grapheme_length(), 0);
574 let segs = rt.segments();
575 assert_eq!(segs.len(), 1);
576 assert_eq!(segs[0].text, "");
577 }
578
579 #[test]
580 fn facet_feature_serde_roundtrip() {
581 let facet = Facet {
582 index: ByteSlice {
583 byte_start: 0,
584 byte_end: 5,
585 },
586 features: vec![FacetFeature::Mention {
587 did: "did:plc:abc123".to_string(),
588 }],
589 };
590 let json = serde_json::to_string(&facet).unwrap();
591 assert!(json.contains("app.bsky.richtext.facet#mention"));
592 let parsed: Facet = serde_json::from_str(&json).unwrap();
593 assert_eq!(parsed.index.byte_start, 0);
594 assert!(
595 matches!(&parsed.features[0], FacetFeature::Mention { did } if did == "did:plc:abc123")
596 );
597 }
598}