use regex::Regex;
use std::sync::LazyLock;
use unicode_segmentation::UnicodeSegmentation;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Facet {
pub index: ByteSlice,
pub features: Vec<FacetFeature>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ByteSlice {
pub byte_start: usize,
pub byte_end: usize,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(tag = "$type")]
pub enum FacetFeature {
#[serde(rename = "app.bsky.richtext.facet#mention")]
Mention { did: String },
#[serde(rename = "app.bsky.richtext.facet#link")]
Link { uri: String },
#[serde(rename = "app.bsky.richtext.facet#tag")]
Tag { tag: String },
}
#[derive(Debug, Clone)]
pub struct RichTextSegment {
pub text: String,
pub facet: Option<Facet>,
}
impl RichTextSegment {
pub fn is_mention(&self) -> bool {
self.facet.as_ref().is_some_and(|f| {
f.features
.iter()
.any(|feat| matches!(feat, FacetFeature::Mention { .. }))
})
}
pub fn is_link(&self) -> bool {
self.facet.as_ref().is_some_and(|f| {
f.features
.iter()
.any(|feat| matches!(feat, FacetFeature::Link { .. }))
})
}
pub fn is_tag(&self) -> bool {
self.facet.as_ref().is_some_and(|f| {
f.features
.iter()
.any(|feat| matches!(feat, FacetFeature::Tag { .. }))
})
}
}
#[derive(Debug, Clone)]
pub struct RichText {
text: String,
facets: Vec<Facet>,
}
impl RichText {
pub fn new(text: impl Into<String>, facets: Option<Vec<Facet>>) -> Self {
let text = text.into();
let mut facets = facets.unwrap_or_default();
facets.retain(|f| f.index.byte_start < f.index.byte_end);
facets.sort_by_key(|f| f.index.byte_start);
RichText { text, facets }
}
pub fn text(&self) -> &str {
&self.text
}
pub fn facets(&self) -> &[Facet] {
&self.facets
}
pub fn len(&self) -> usize {
self.text.len()
}
pub fn is_empty(&self) -> bool {
self.text.is_empty()
}
pub fn grapheme_length(&self) -> usize {
self.text.graphemes(true).count()
}
pub fn detect_facets(&mut self) {
self.facets = detect_facets(&self.text);
}
pub fn insert(&mut self, index: usize, insert_text: &str) {
let added = insert_text.len();
self.text.insert_str(index, insert_text);
for facet in &mut self.facets {
if index <= facet.index.byte_start {
facet.index.byte_start += added;
facet.index.byte_end += added;
} else if index < facet.index.byte_end {
facet.index.byte_end += added;
}
}
}
pub fn delete(&mut self, start: usize, end: usize) {
let removed = end - start;
self.text.replace_range(start..end, "");
for facet in &mut self.facets {
let fs = facet.index.byte_start;
let fe = facet.index.byte_end;
if start <= fs && end >= fe {
facet.index.byte_start = start;
facet.index.byte_end = start;
} else if start >= fe {
} else if start > fs && end >= fe {
facet.index.byte_end = start;
} else if start > fs && end < fe {
facet.index.byte_end -= removed;
} else if start <= fs && end > fs && end < fe {
facet.index.byte_start = start;
facet.index.byte_end -= removed;
} else if end <= fs {
facet.index.byte_start -= removed;
facet.index.byte_end -= removed;
}
}
self.facets
.retain(|f| f.index.byte_start < f.index.byte_end);
}
pub fn segments(&self) -> Vec<RichTextSegment> {
if self.facets.is_empty() {
return vec![RichTextSegment {
text: self.text.clone(),
facet: None,
}];
}
let mut segments = Vec::new();
let mut cursor = 0;
for facet in &self.facets {
let start = facet.index.byte_start;
let end = facet.index.byte_end.min(self.text.len());
if cursor < start {
segments.push(RichTextSegment {
text: self.text[cursor..start].to_string(),
facet: None,
});
}
let seg_text = &self.text[start..end];
if !seg_text.trim().is_empty() {
segments.push(RichTextSegment {
text: seg_text.to_string(),
facet: Some(facet.clone()),
});
} else {
segments.push(RichTextSegment {
text: seg_text.to_string(),
facet: None,
});
}
cursor = end;
}
if cursor < self.text.len() {
segments.push(RichTextSegment {
text: self.text[cursor..].to_string(),
facet: None,
});
}
segments
}
}
static MENTION_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:^|\s|\()(@)([a-zA-Z0-9]([a-zA-Z0-9.-]*[a-zA-Z0-9])?\.[a-zA-Z]{2,})")
.expect("mention regex")
});
static URL_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?:^|\s|\()(https?://[\S]+)").expect("url regex"));
static TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:^|\s)[##]([^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*[^\d\s\p{Punctuation}\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]+[^\s\u{00AD}\u{2060}\u{200A}\u{200B}\u{200C}\u{200D}]*)")
.expect("tag regex")
});
pub fn detect_facets(text: &str) -> Vec<Facet> {
let mut facets = Vec::new();
for cap in MENTION_RE.captures_iter(text) {
let handle_match = cap.get(2).unwrap();
let handle = handle_match.as_str();
let at_match = cap.get(1).unwrap();
let byte_start = at_match.start();
let byte_end = handle_match.end();
facets.push(Facet {
index: ByteSlice {
byte_start,
byte_end,
},
features: vec![FacetFeature::Mention {
did: handle.to_string(),
}],
});
}
for cap in URL_RE.captures_iter(text) {
let url_match = cap.get(1).unwrap();
let mut uri = url_match.as_str().to_string();
let byte_start = url_match.start();
let mut byte_end = url_match.end();
while uri.ends_with(['.', ',', ';', ':', '!', '?']) {
uri.pop();
byte_end -= 1;
}
if uri.ends_with(')') && !uri.contains('(') {
uri.pop();
byte_end -= 1;
}
facets.push(Facet {
index: ByteSlice {
byte_start,
byte_end,
},
features: vec![FacetFeature::Link { uri }],
});
}
for cap in TAG_RE.captures_iter(text) {
let tag_match = cap.get(1).unwrap();
let tag = tag_match.as_str();
if tag.is_empty() || tag.len() > 64 {
continue;
}
let tag_trimmed = tag.trim_end_matches(|c: char| c.is_ascii_punctuation());
if tag_trimmed.is_empty() {
continue;
}
let full_match = cap.get(0).unwrap();
let hash_pos = full_match
.as_str()
.find('#')
.or_else(|| full_match.as_str().find('#'))
.unwrap_or(0);
let byte_start = full_match.start() + hash_pos;
let byte_end = byte_start + 1 + tag_trimmed.len();
let byte_end = byte_end.min(text.len());
facets.push(Facet {
index: ByteSlice {
byte_start,
byte_end,
},
features: vec![FacetFeature::Tag {
tag: tag_trimmed.to_string(),
}],
});
}
facets.sort_by_key(|f| f.index.byte_start);
facets
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_text_no_facets() {
let rt = RichText::new("Hello, world!", None);
assert_eq!(rt.text(), "Hello, world!");
assert!(rt.facets().is_empty());
assert_eq!(rt.len(), 13);
assert_eq!(rt.grapheme_length(), 13);
}
#[test]
fn detect_mention() {
let mut rt = RichText::new("Hello @alice.bsky.social!", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert!(
matches!(&f.features[0], FacetFeature::Mention { did } if did == "alice.bsky.social")
);
assert_eq!(
&rt.text()[f.index.byte_start..f.index.byte_end],
"@alice.bsky.social"
);
}
#[test]
fn detect_url() {
let mut rt = RichText::new("Check https://example.com/path here", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert!(
matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/path")
);
}
#[test]
fn detect_url_strips_trailing_punctuation() {
let mut rt = RichText::new("Visit https://example.com.", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert!(
matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com")
);
}
#[test]
fn detect_url_strips_trailing_paren_without_open() {
let mut rt = RichText::new("(see https://example.com/page)", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert!(
matches!(&f.features[0], FacetFeature::Link { uri } if uri == "https://example.com/page")
);
}
#[test]
fn detect_hashtag() {
let mut rt = RichText::new("Hello #atproto world", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert!(matches!(&f.features[0], FacetFeature::Tag { tag } if tag == "atproto"));
}
#[test]
fn detect_multiple_facets() {
let mut rt = RichText::new("@alice.test posted https://example.com #cool", None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 3);
assert!(
rt.facets()[0]
.features
.iter()
.any(|f| matches!(f, FacetFeature::Mention { .. }))
);
assert!(
rt.facets()[1]
.features
.iter()
.any(|f| matches!(f, FacetFeature::Link { .. }))
);
assert!(
rt.facets()[2]
.features
.iter()
.any(|f| matches!(f, FacetFeature::Tag { .. }))
);
}
#[test]
fn segments_no_facets() {
let rt = RichText::new("Hello world", None);
let segs = rt.segments();
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].text, "Hello world");
assert!(segs[0].facet.is_none());
}
#[test]
fn segments_with_facets() {
let mut rt = RichText::new("Hello @alice.test world", None);
rt.detect_facets();
let segs = rt.segments();
assert_eq!(segs.len(), 3);
assert_eq!(segs[0].text, "Hello ");
assert!(segs[0].facet.is_none());
assert_eq!(segs[1].text, "@alice.test");
assert!(segs[1].is_mention());
assert_eq!(segs[2].text, " world");
assert!(segs[2].facet.is_none());
}
#[test]
fn insert_before_facet() {
let facets = vec![Facet {
index: ByteSlice {
byte_start: 6,
byte_end: 11,
},
features: vec![FacetFeature::Tag {
tag: "test".to_string(),
}],
}];
let mut rt = RichText::new("Hello #test", Some(facets));
rt.insert(0, "Hey ");
assert_eq!(rt.text(), "Hey Hello #test");
assert_eq!(rt.facets()[0].index.byte_start, 10);
assert_eq!(rt.facets()[0].index.byte_end, 15);
}
#[test]
fn insert_inside_facet() {
let facets = vec![Facet {
index: ByteSlice {
byte_start: 0,
byte_end: 5,
},
features: vec![FacetFeature::Link {
uri: "https://example.com".to_string(),
}],
}];
let mut rt = RichText::new("Hello world", Some(facets));
rt.insert(3, "XX");
assert_eq!(rt.text(), "HelXXlo world");
assert_eq!(rt.facets()[0].index.byte_start, 0);
assert_eq!(rt.facets()[0].index.byte_end, 7);
}
#[test]
fn delete_before_facet() {
let facets = vec![Facet {
index: ByteSlice {
byte_start: 6,
byte_end: 11,
},
features: vec![FacetFeature::Tag {
tag: "test".to_string(),
}],
}];
let mut rt = RichText::new("Hello #test", Some(facets));
rt.delete(0, 6);
assert_eq!(rt.text(), "#test");
assert_eq!(rt.facets()[0].index.byte_start, 0);
assert_eq!(rt.facets()[0].index.byte_end, 5);
}
#[test]
fn delete_spanning_facet_removes_it() {
let facets = vec![Facet {
index: ByteSlice {
byte_start: 6,
byte_end: 11,
},
features: vec![FacetFeature::Tag {
tag: "test".to_string(),
}],
}];
let mut rt = RichText::new("Hello #test world", Some(facets));
rt.delete(5, 12);
assert_eq!(rt.text(), "Helloworld");
assert!(rt.facets().is_empty());
}
#[test]
fn grapheme_length_emoji() {
let rt = RichText::new("Hi 👋🏽", None);
assert_eq!(rt.grapheme_length(), 4);
assert!(rt.len() > 4);
}
#[test]
fn utf8_byte_offsets_work_natively() {
let text = "Héllo @alice.test";
let mut rt = RichText::new(text, None);
rt.detect_facets();
assert_eq!(rt.facets().len(), 1);
let f = &rt.facets()[0];
assert_eq!(
&rt.text()[f.index.byte_start..f.index.byte_end],
"@alice.test"
);
}
#[test]
fn empty_text() {
let rt = RichText::new("", None);
assert!(rt.is_empty());
assert_eq!(rt.len(), 0);
assert_eq!(rt.grapheme_length(), 0);
let segs = rt.segments();
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].text, "");
}
#[test]
fn facet_feature_serde_roundtrip() {
let facet = Facet {
index: ByteSlice {
byte_start: 0,
byte_end: 5,
},
features: vec![FacetFeature::Mention {
did: "did:plc:abc123".to_string(),
}],
};
let json = serde_json::to_string(&facet).unwrap();
assert!(json.contains("app.bsky.richtext.facet#mention"));
let parsed: Facet = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.index.byte_start, 0);
assert!(
matches!(&parsed.features[0], FacetFeature::Mention { did } if did == "did:plc:abc123")
);
}
}