1use regex::Regex;
6use std::sync::LazyLock;
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11#[serde(rename_all = "camelCase")]
12pub struct Facet {
13 pub index: ByteSlice,
14 pub features: Vec<FacetFeature>,
15}
16
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
19#[serde(rename_all = "camelCase")]
20pub struct ByteSlice {
21 pub byte_start: usize,
22 pub byte_end: usize,
23}
24
25#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
27#[serde(tag = "$type")]
28pub enum FacetFeature {
29 #[serde(rename = "app.bsky.richtext.facet#mention")]
30 Mention { did: String },
31 #[serde(rename = "app.bsky.richtext.facet#link")]
32 Link { uri: String },
33 #[serde(rename = "app.bsky.richtext.facet#tag")]
34 Tag { tag: String },
35}
36
37#[derive(Debug, Clone)]
39pub struct RichTextSegment {
40 pub text: String,
41 pub facet: Option<Facet>,
42}
43
44impl RichTextSegment {
45 pub fn is_mention(&self) -> bool {
46 self.facet.as_ref().is_some_and(|f| {
47 f.features
48 .iter()
49 .any(|feat| matches!(feat, FacetFeature::Mention { .. }))
50 })
51 }
52
53 pub fn is_link(&self) -> bool {
54 self.facet.as_ref().is_some_and(|f| {
55 f.features
56 .iter()
57 .any(|feat| matches!(feat, FacetFeature::Link { .. }))
58 })
59 }
60
61 pub fn is_tag(&self) -> bool {
62 self.facet.as_ref().is_some_and(|f| {
63 f.features
64 .iter()
65 .any(|feat| matches!(feat, FacetFeature::Tag { .. }))
66 })
67 }
68}
69
70#[derive(Debug, Clone)]
75pub struct RichText {
76 text: String,
77 facets: Vec<Facet>,
78}
79
80impl RichText {
81 pub fn new(text: impl Into<String>, facets: Option<Vec<Facet>>) -> Self {
83 let text = text.into();
84 let mut facets = facets.unwrap_or_default();
85 facets.retain(|f| f.index.byte_start < f.index.byte_end);
87 facets.sort_by_key(|f| f.index.byte_start);
88 RichText { text, facets }
89 }
90
91 pub fn text(&self) -> &str {
93 &self.text
94 }
95
96 pub fn facets(&self) -> &[Facet] {
98 &self.facets
99 }
100
101 pub fn len(&self) -> usize {
103 self.text.len()
104 }
105
106 pub fn is_empty(&self) -> bool {
108 self.text.is_empty()
109 }
110
111 pub fn grapheme_length(&self) -> usize {
113 self.text.graphemes(true).count()
114 }
115
116 pub fn detect_facets(&mut self) {
120 self.facets = detect_facets(&self.text);
121 }
122
123 pub fn insert(&mut self, index: usize, insert_text: &str) {
125 let added = insert_text.len();
126 self.text.insert_str(index, insert_text);
127
128 for facet in &mut self.facets {
129 if index <= facet.index.byte_start {
130 facet.index.byte_start += added;
132 facet.index.byte_end += added;
133 } else if index < facet.index.byte_end {
134 facet.index.byte_end += added;
136 }
137 }
139 }
140
141 pub fn delete(&mut self, start: usize, end: usize) {
143 let removed = end - start;
144
145 self.text.replace_range(start..end, "");
147
148 for facet in &mut self.facets {
149 let fs = facet.index.byte_start;
150 let fe = facet.index.byte_end;
151
152 if start <= fs && end >= fe {
153 facet.index.byte_start = start;
155 facet.index.byte_end = start;
156 } else if start >= fe {
157 } else if start > fs && end >= fe {
159 facet.index.byte_end = start;
161 } else if start > fs && end < fe {
162 facet.index.byte_end -= removed;
164 } else if start <= fs && end > fs && end < fe {
165 facet.index.byte_start = start;
167 facet.index.byte_end -= removed;
168 } else if end <= fs {
169 facet.index.byte_start -= removed;
171 facet.index.byte_end -= removed;
172 }
173 }
174
175 self.facets
177 .retain(|f| f.index.byte_start < f.index.byte_end);
178 }
179
180 pub fn segments(&self) -> Vec<RichTextSegment> {
182 if self.facets.is_empty() {
183 return vec![RichTextSegment {
184 text: self.text.clone(),
185 facet: None,
186 }];
187 }
188
189 let mut segments = Vec::new();
190 let mut cursor = 0;
191
192 for facet in &self.facets {
193 let start = facet.index.byte_start;
194 let end = facet.index.byte_end.min(self.text.len());
195
196 if cursor < start {
198 segments.push(RichTextSegment {
199 text: self.text[cursor..start].to_string(),
200 facet: None,
201 });
202 }
203
204 let seg_text = &self.text[start..end];
206 if !seg_text.trim().is_empty() {
207 segments.push(RichTextSegment {
208 text: seg_text.to_string(),
209 facet: Some(facet.clone()),
210 });
211 } else {
212 segments.push(RichTextSegment {
213 text: seg_text.to_string(),
214 facet: None,
215 });
216 }
217
218 cursor = end;
219 }
220
221 if cursor < self.text.len() {
223 segments.push(RichTextSegment {
224 text: self.text[cursor..].to_string(),
225 facet: None,
226 });
227 }
228
229 segments
230 }
231}
232
233static MENTION_RE: LazyLock<Regex> = LazyLock::new(|| {
236 Regex::new(r"(?:^|\s|\()(@)([a-zA-Z0-9]([a-zA-Z0-9.-]*[a-zA-Z0-9])?\.[a-zA-Z]{2,})")
237 .expect("mention regex")
238});
239
240static URL_RE: LazyLock<Regex> =
241 LazyLock::new(|| Regex::new(r"(?:^|\s|\()(https?://[\S]+)").expect("url regex"));
242
243static TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
244 Regex::new(r"(?:^|\s)[##]([^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*[^\d\s\p{Punctuation}\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]+[^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*)")
245 .expect("tag regex")
246});
247
248pub fn detect_facets(text: &str) -> Vec<Facet> {
250 let mut facets = Vec::new();
251
252 for cap in MENTION_RE.captures_iter(text) {
254 let handle_match = cap.get(2).unwrap();
255 let handle = handle_match.as_str();
256
257 let at_match = cap.get(1).unwrap();
259 let byte_start = at_match.start();
260 let byte_end = handle_match.end();
261
262 facets.push(Facet {
263 index: ByteSlice {
264 byte_start,
265 byte_end,
266 },
267 features: vec![FacetFeature::Mention {
268 did: handle.to_string(),
269 }],
270 });
271 }
272
273 for cap in URL_RE.captures_iter(text) {
275 let url_match = cap.get(1).unwrap();
276 let mut uri = url_match.as_str().to_string();
277 let byte_start = url_match.start();
278 let mut byte_end = url_match.end();
279
280 while uri.ends_with(['.', ',', ';', ':', '!', '?']) {
282 uri.pop();
283 byte_end -= 1;
284 }
285
286 if uri.ends_with(')') && !uri.contains('(') {
288 uri.pop();
289 byte_end -= 1;
290 }
291
292 facets.push(Facet {
293 index: ByteSlice {
294 byte_start,
295 byte_end,
296 },
297 features: vec![FacetFeature::Link { uri }],
298 });
299 }
300
301 for cap in TAG_RE.captures_iter(text) {
303 let tag_match = cap.get(1).unwrap();
304 let tag = tag_match.as_str();
305
306 if tag.is_empty() || tag.len() > 64 {
308 continue;
309 }
310
311 let tag_trimmed = tag.trim_end_matches(|c: char| c.is_ascii_punctuation());
313 if tag_trimmed.is_empty() {
314 continue;
315 }
316
317 let full_match = cap.get(0).unwrap();
319 let hash_pos = full_match
321 .as_str()
322 .find('#')
323 .or_else(|| full_match.as_str().find('#'))
324 .unwrap_or(0);
325 let byte_start = full_match.start() + hash_pos;
326 let byte_end = byte_start + 1 + tag_trimmed.len(); let byte_end = byte_end.min(text.len());
330
331 facets.push(Facet {
332 index: ByteSlice {
333 byte_start,
334 byte_end,
335 },
336 features: vec![FacetFeature::Tag {
337 tag: tag_trimmed.to_string(),
338 }],
339 });
340 }
341
342 facets.sort_by_key(|f| f.index.byte_start);
343 facets
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn basic_text_no_facets() {
352 let rt = RichText::new("Hello, world!", None);
353 assert_eq!(rt.text(), "Hello, world!");
354 assert!(rt.facets().is_empty());
355 assert_eq!(rt.len(), 13);
356 assert_eq!(rt.grapheme_length(), 13);
357 }
358
359 #[test]
360 fn detect_mention() {
361 let mut rt = RichText::new("Hello @alice.bsky.social!", None);
362 rt.detect_facets();
363 assert_eq!(rt.facets().len(), 1);
364 let f = &rt.facets()[0];
365 assert!(
366 matches!(&f.features[0], FacetFeature::Mention { did } if did == "alice.bsky.social")
367 );
368 assert_eq!(
369 &rt.text()[f.index.byte_start..f.index.byte_end],
370 "@alice.bsky.social"
371 );
372 }
373
374 #[test]
375 fn detect_url() {
376 let mut rt = RichText::new("Check https://example.com/path here", None);
377 rt.detect_facets();
378 assert_eq!(rt.facets().len(), 1);
379 let f = &rt.facets()[0];
380 assert!(
381 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/path")
382 );
383 }
384
385 #[test]
386 fn detect_url_strips_trailing_punctuation() {
387 let mut rt = RichText::new("Visit https://example.com.", None);
388 rt.detect_facets();
389 assert_eq!(rt.facets().len(), 1);
390 let f = &rt.facets()[0];
391 assert!(
392 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com")
393 );
394 }
395
396 #[test]
397 fn detect_url_strips_trailing_paren_without_open() {
398 let mut rt = RichText::new("(see https://example.com/page)", None);
399 rt.detect_facets();
400 assert_eq!(rt.facets().len(), 1);
401 let f = &rt.facets()[0];
402 assert!(
404 matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/page")
405 );
406 }
407
408 #[test]
409 fn detect_hashtag() {
410 let mut rt = RichText::new("Hello #atproto world", None);
411 rt.detect_facets();
412 assert_eq!(rt.facets().len(), 1);
413 let f = &rt.facets()[0];
414 assert!(matches!(&f.features[0], FacetFeature::Tag { tag } if tag == "atproto"));
415 }
416
417 #[test]
418 fn detect_multiple_facets() {
419 let mut rt = RichText::new("@alice.test posted https://example.com #cool", None);
420 rt.detect_facets();
421 assert_eq!(rt.facets().len(), 3);
422 assert!(
423 rt.facets()[0]
424 .features
425 .iter()
426 .any(|f| matches!(f, FacetFeature::Mention { .. }))
427 );
428 assert!(
429 rt.facets()[1]
430 .features
431 .iter()
432 .any(|f| matches!(f, FacetFeature::Link { .. }))
433 );
434 assert!(
435 rt.facets()[2]
436 .features
437 .iter()
438 .any(|f| matches!(f, FacetFeature::Tag { .. }))
439 );
440 }
441
442 #[test]
443 fn segments_no_facets() {
444 let rt = RichText::new("Hello world", None);
445 let segs = rt.segments();
446 assert_eq!(segs.len(), 1);
447 assert_eq!(segs[0].text, "Hello world");
448 assert!(segs[0].facet.is_none());
449 }
450
451 #[test]
452 fn segments_with_facets() {
453 let mut rt = RichText::new("Hello @alice.test world", None);
454 rt.detect_facets();
455 let segs = rt.segments();
456 assert_eq!(segs.len(), 3);
457 assert_eq!(segs[0].text, "Hello ");
458 assert!(segs[0].facet.is_none());
459 assert_eq!(segs[1].text, "@alice.test");
460 assert!(segs[1].is_mention());
461 assert_eq!(segs[2].text, " world");
462 assert!(segs[2].facet.is_none());
463 }
464
465 #[test]
466 fn insert_before_facet() {
467 let facets = vec![Facet {
468 index: ByteSlice {
469 byte_start: 6,
470 byte_end: 11,
471 },
472 features: vec![FacetFeature::Tag {
473 tag: "test".to_string(),
474 }],
475 }];
476 let mut rt = RichText::new("Hello #test", Some(facets));
477 rt.insert(0, "Hey ");
478 assert_eq!(rt.text(), "Hey Hello #test");
479 assert_eq!(rt.facets()[0].index.byte_start, 10);
480 assert_eq!(rt.facets()[0].index.byte_end, 15);
481 }
482
483 #[test]
484 fn insert_inside_facet() {
485 let facets = vec![Facet {
486 index: ByteSlice {
487 byte_start: 0,
488 byte_end: 5,
489 },
490 features: vec![FacetFeature::Link {
491 uri: "https://example.com".to_string(),
492 }],
493 }];
494 let mut rt = RichText::new("Hello world", Some(facets));
495 rt.insert(3, "XX");
496 assert_eq!(rt.text(), "HelXXlo world");
497 assert_eq!(rt.facets()[0].index.byte_start, 0);
498 assert_eq!(rt.facets()[0].index.byte_end, 7);
499 }
500
501 #[test]
502 fn delete_before_facet() {
503 let facets = vec![Facet {
504 index: ByteSlice {
505 byte_start: 6,
506 byte_end: 11,
507 },
508 features: vec![FacetFeature::Tag {
509 tag: "test".to_string(),
510 }],
511 }];
512 let mut rt = RichText::new("Hello #test", Some(facets));
513 rt.delete(0, 6);
514 assert_eq!(rt.text(), "#test");
515 assert_eq!(rt.facets()[0].index.byte_start, 0);
516 assert_eq!(rt.facets()[0].index.byte_end, 5);
517 }
518
519 #[test]
520 fn delete_spanning_facet_removes_it() {
521 let facets = vec![Facet {
522 index: ByteSlice {
523 byte_start: 6,
524 byte_end: 11,
525 },
526 features: vec![FacetFeature::Tag {
527 tag: "test".to_string(),
528 }],
529 }];
530 let mut rt = RichText::new("Hello #test world", Some(facets));
531 rt.delete(5, 12);
532 assert_eq!(rt.text(), "Helloworld");
533 assert!(rt.facets().is_empty());
534 }
535
536 #[test]
537 fn grapheme_length_emoji() {
538 let rt = RichText::new("Hi 👋🏽", None);
539 assert_eq!(rt.grapheme_length(), 4);
541 assert!(rt.len() > 4);
543 }
544
545 #[test]
546 fn utf8_byte_offsets_work_natively() {
547 let text = "Héllo @alice.test";
549 let mut rt = RichText::new(text, None);
550 rt.detect_facets();
551 assert_eq!(rt.facets().len(), 1);
552 let f = &rt.facets()[0];
553 assert_eq!(
554 &rt.text()[f.index.byte_start..f.index.byte_end],
555 "@alice.test"
556 );
557 }
558
559 #[test]
560 fn empty_text() {
561 let rt = RichText::new("", None);
562 assert!(rt.is_empty());
563 assert_eq!(rt.len(), 0);
564 assert_eq!(rt.grapheme_length(), 0);
565 let segs = rt.segments();
566 assert_eq!(segs.len(), 1);
567 assert_eq!(segs[0].text, "");
568 }
569
570 #[test]
571 fn facet_feature_serde_roundtrip() {
572 let facet = Facet {
573 index: ByteSlice {
574 byte_start: 0,
575 byte_end: 5,
576 },
577 features: vec![FacetFeature::Mention {
578 did: "did:plc:abc123".to_string(),
579 }],
580 };
581 let json = serde_json::to_string(&facet).unwrap();
582 assert!(json.contains("app.bsky.richtext.facet#mention"));
583 let parsed: Facet = serde_json::from_str(&json).unwrap();
584 assert_eq!(parsed.index.byte_start, 0);
585 assert!(
586 matches!(&parsed.features[0], FacetFeature::Mention { did } if did == "did:plc:abc123")
587 );
588 }
589}