1use fancy_regex::Regex;
5use serde::{Deserialize, Serialize};
6use serde_json::{Value, json};
7use std::sync::LazyLock;
8
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
11#[serde(rename_all = "camelCase")]
12pub enum TextFormattingStyle {
13 Bold,
14 Italic,
15 Underline,
16 Strikethrough,
17}
18
19impl TextFormattingStyle {
20 fn as_str(&self) -> &'static str {
21 match self {
22 Self::Bold => "bold",
23 Self::Italic => "italic",
24 Self::Underline => "underline",
25 Self::Strikethrough => "strikethrough",
26 }
27 }
28
29 fn from_str(s: &str) -> Option<Self> {
30 match s {
31 "bold" => Some(Self::Bold),
32 "italic" => Some(Self::Italic),
33 "underline" => Some(Self::Underline),
34 "strikethrough" => Some(Self::Strikethrough),
35 _ => None,
36 }
37 }
38}
39
40#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct TextFormattingRange {
43 pub start: usize,
44 pub length: usize,
45 pub styles: Vec<TextFormattingStyle>,
46}
47
48impl TextFormattingRange {
49 pub fn to_json(&self) -> Value {
50 json!({
51 "start": self.start,
52 "length": self.length,
53 "styles": self.styles.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
54 })
55 }
56}
57
58pub struct ParsedFormatting {
60 pub clean_text: String,
61 pub formatting: Vec<TextFormattingRange>,
62}
63
64impl ParsedFormatting {
65 pub fn formatting_json(&self) -> Value {
67 json!(
68 self.formatting
69 .iter()
70 .map(|r| r.to_json())
71 .collect::<Vec<_>>()
72 )
73 }
74}
75
76pub fn validate_text_formatting(formatting: &Value, message: &str) -> Result<(), String> {
78 let arr = formatting
79 .as_array()
80 .ok_or("textFormatting must be an array")?;
81
82 if message.is_empty() {
83 return Err("A non-empty 'message' is required when using textFormatting".to_string());
84 }
85
86 let msg_len = message.len();
87 for (i, range) in arr.iter().enumerate() {
88 let obj = range
89 .as_object()
90 .ok_or(format!("textFormatting[{i}] must be an object"))?;
91
92 let start = obj
93 .get("start")
94 .and_then(|v| v.as_u64())
95 .ok_or(format!("textFormatting[{i}].start must be an integer >= 0"))?
96 as usize;
97
98 let length = obj
99 .get("length")
100 .and_then(|v| v.as_u64())
101 .filter(|&v| v > 0)
102 .ok_or(format!("textFormatting[{i}].length must be an integer > 0"))?
103 as usize;
104
105 if start + length > msg_len {
106 return Err(format!("textFormatting[{i}] range exceeds message length"));
107 }
108
109 let styles = obj
110 .get("styles")
111 .and_then(|v| v.as_array())
112 .filter(|a| !a.is_empty())
113 .ok_or(format!(
114 "textFormatting[{i}].styles must be a non-empty array"
115 ))?;
116
117 for style_val in styles {
118 let s = style_val.as_str().ok_or(format!(
119 "textFormatting[{i}].styles contains non-string value"
120 ))?;
121 if TextFormattingStyle::from_str(s).is_none() {
122 return Err(format!(
123 "textFormatting[{i}].styles contains unsupported value: {s}"
124 ));
125 }
126 }
127 }
128
129 Ok(())
130}
131
132pub fn has_text_formatting(formatting: Option<&Value>) -> bool {
134 formatting
135 .and_then(|v| v.as_array())
136 .map(|a| !a.is_empty())
137 .unwrap_or(false)
138}
139
140const PUA_PROTECT: char = '\u{E000}';
142const PUA_ESCAPE: char = '\u{F000}';
143
144struct EmphasisPattern {
146 regex: &'static LazyLock<Regex>,
147 styles: Vec<TextFormattingStyle>,
148}
149
150static RE_FENCED_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"```[\s\S]*?```").unwrap());
153static RE_INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`]+`").unwrap());
154static RE_URL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://[^\s)>\]]+").unwrap());
155static RE_BACKSLASH_ESCAPE: LazyLock<Regex> =
156 LazyLock::new(|| Regex::new(r#"\\([\\`*_~\{}\[\]<>()\#+\-.!|])"#).unwrap());
157
158static RE_BOLD_ITALIC_STAR: LazyLock<Regex> =
160 LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap());
161static RE_BOLD_ITALIC_UNDER: LazyLock<Regex> =
162 LazyLock::new(|| Regex::new(r"(?<!\w)___(.+?)___(?!\w)").unwrap());
163static RE_BOLD_STAR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
164static RE_BOLD_UNDER: LazyLock<Regex> =
165 LazyLock::new(|| Regex::new(r"(?<!\w)__(.+?)__(?!\w)").unwrap());
166static RE_ITALIC_STAR: LazyLock<Regex> =
167 LazyLock::new(|| Regex::new(r"(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)").unwrap());
168static RE_ITALIC_UNDER: LazyLock<Regex> =
169 LazyLock::new(|| Regex::new(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)").unwrap());
170static RE_STRIKETHROUGH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
171
172fn find_all_matches(re: &Regex, text: &str) -> Vec<(usize, usize, String)> {
174 let mut results = Vec::new();
175 let mut start = 0;
176 while start < text.len() {
177 match re.find_from_pos(text, start) {
178 Ok(Some(m)) => {
179 results.push((m.start(), m.end(), m.as_str().to_string()));
180 start = m.end();
181 }
182 _ => break,
183 }
184 }
185 results
186}
187
188fn captures_all(re: &Regex, text: &str) -> Vec<(usize, usize, String, String)> {
190 let mut results = Vec::new();
192 let mut start = 0;
193 while start < text.len() {
194 match re.captures_from_pos(text, start) {
195 Ok(Some(caps)) => {
196 let full = caps.get(0).unwrap();
197 let group1 = caps
198 .get(1)
199 .map(|m| m.as_str().to_string())
200 .unwrap_or_default();
201 results.push((full.start(), full.end(), full.as_str().to_string(), group1));
202 start = full.end();
203 }
204 _ => break,
205 }
206 }
207 results
208}
209
210pub fn parse_markdown_formatting(text: &str) -> Option<ParsedFormatting> {
219 if text.is_empty() {
220 return None;
221 }
222
223 let mut protected_regions: Vec<String> = Vec::new();
225 let mut work = text.to_string();
226
227 let protect = |work: &mut String, regions: &mut Vec<String>, re: &Regex| {
228 let matches = find_all_matches(re, work);
229 if matches.is_empty() {
230 return;
231 }
232 let mut result = String::new();
233 let mut last = 0;
234 for (mstart, mend, mtext) in &matches {
235 result.push_str(&work[last..*mstart]);
236 let idx = regions.len();
237 regions.push(mtext.clone());
238 let pua = char::from_u32(PUA_PROTECT as u32 + idx as u32).unwrap_or(PUA_PROTECT);
239 for _ in 0..mtext.len() {
240 result.push(pua);
241 }
242 last = *mend;
243 }
244 result.push_str(&work[last..]);
245 *work = result;
246 };
247
248 protect(&mut work, &mut protected_regions, &RE_FENCED_CODE);
249 protect(&mut work, &mut protected_regions, &RE_INLINE_CODE);
250 protect(&mut work, &mut protected_regions, &RE_URL);
251
252 let mut escaped_chars: Vec<String> = Vec::new();
254 {
255 let caps = captures_all(&RE_BACKSLASH_ESCAPE, &work);
256 if !caps.is_empty() {
257 let mut result = String::new();
258 let mut last = 0;
259 for (fstart, fend, _, group1) in &caps {
260 result.push_str(&work[last..*fstart]);
261 let idx = escaped_chars.len();
262 escaped_chars.push(group1.clone());
263 let pua = char::from_u32(PUA_ESCAPE as u32 + idx as u32).unwrap_or(PUA_ESCAPE);
264 result.push(pua);
265 last = *fend;
266 }
267 result.push_str(&work[last..]);
268 work = result;
269 }
270 }
271
272 let mut formatting: Vec<TextFormattingRange> = Vec::new();
274
275 let patterns = [
276 EmphasisPattern {
277 regex: &RE_BOLD_ITALIC_STAR,
278 styles: vec![TextFormattingStyle::Bold, TextFormattingStyle::Italic],
279 },
280 EmphasisPattern {
281 regex: &RE_BOLD_ITALIC_UNDER,
282 styles: vec![TextFormattingStyle::Bold, TextFormattingStyle::Italic],
283 },
284 EmphasisPattern {
285 regex: &RE_BOLD_STAR,
286 styles: vec![TextFormattingStyle::Bold],
287 },
288 EmphasisPattern {
289 regex: &RE_BOLD_UNDER,
290 styles: vec![TextFormattingStyle::Bold],
291 },
292 EmphasisPattern {
293 regex: &RE_ITALIC_STAR,
294 styles: vec![TextFormattingStyle::Italic],
295 },
296 EmphasisPattern {
297 regex: &RE_ITALIC_UNDER,
298 styles: vec![TextFormattingStyle::Italic],
299 },
300 EmphasisPattern {
301 regex: &RE_STRIKETHROUGH,
302 styles: vec![TextFormattingStyle::Strikethrough],
303 },
304 ];
305
306 for pattern in &patterns {
307 let caps_list = captures_all(pattern.regex, &work);
308 if caps_list.is_empty() {
309 continue;
310 }
311
312 let mut pass_ranges: Vec<TextFormattingRange> = Vec::new();
313 let mut removed_positions: Vec<usize> = Vec::new();
314
315 let mut result = String::new();
316 let mut last = 0;
317
318 for (fstart, fend, full_text, content) in &caps_list {
319 result.push_str(&work[last..*fstart]);
320
321 let marker_len = (full_text.len() - content.len()) / 2;
322
323 for j in 0..marker_len {
325 removed_positions.push(fstart + j);
326 }
327 for j in 0..marker_len {
328 removed_positions.push(fstart + marker_len + content.len() + j);
329 }
330
331 let start = result.len();
332 result.push_str(content);
333 pass_ranges.push(TextFormattingRange {
334 start,
335 length: content.len(),
336 styles: pattern.styles.clone(),
337 });
338
339 last = *fend;
340 }
341
342 result.push_str(&work[last..]);
343 work = result;
344
345 if !removed_positions.is_empty() {
347 removed_positions.sort();
348 for range in &mut formatting {
349 let mut start_shift = 0usize;
350 let mut length_reduction = 0usize;
351 for &pos in &removed_positions {
352 if pos < range.start {
353 start_shift += 1;
354 } else if pos < range.start + range.length {
355 length_reduction += 1;
356 }
357 }
358 range.start -= start_shift;
359 range.length -= length_reduction;
360 }
361 }
362
363 formatting.extend(pass_ranges);
364 }
365
366 let mut clean_text = work;
368
369 for i in (0..protected_regions.len()).rev() {
371 let pua = char::from_u32(PUA_PROTECT as u32 + i as u32).unwrap_or(PUA_PROTECT);
372 let pua_run: String = std::iter::repeat_n(pua, protected_regions[i].len()).collect();
373 clean_text = clean_text.replace(&pua_run, &protected_regions[i]);
374 }
375
376 for (i, escaped) in escaped_chars.iter().enumerate() {
378 let pua = char::from_u32(PUA_ESCAPE as u32 + i as u32).unwrap_or(PUA_ESCAPE);
379 clean_text = clean_text.replace(pua, escaped);
380 }
381
382 if formatting.is_empty() && clean_text == text {
384 return None;
385 }
386
387 formatting.retain(|r| r.length > 0);
389 formatting.sort_by_key(|r| r.start);
390
391 Some(ParsedFormatting {
392 clean_text,
393 formatting,
394 })
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[test]
402 fn null_empty_input() {
403 assert!(parse_markdown_formatting("").is_none());
404 }
405
406 #[test]
407 fn plain_text_unchanged() {
408 assert!(parse_markdown_formatting("hello world").is_none());
409 }
410
411 #[test]
412 fn bold_stars() {
413 let r = parse_markdown_formatting("**bold**").unwrap();
414 assert_eq!(r.clean_text, "bold");
415 assert_eq!(r.formatting.len(), 1);
416 assert_eq!(r.formatting[0].start, 0);
417 assert_eq!(r.formatting[0].length, 4);
418 assert_eq!(r.formatting[0].styles, vec![TextFormattingStyle::Bold]);
419 }
420
421 #[test]
422 fn italic_stars() {
423 let r = parse_markdown_formatting("*italic*").unwrap();
424 assert_eq!(r.clean_text, "italic");
425 assert_eq!(r.formatting.len(), 1);
426 assert_eq!(r.formatting[0].start, 0);
427 assert_eq!(r.formatting[0].length, 6);
428 assert_eq!(r.formatting[0].styles, vec![TextFormattingStyle::Italic]);
429 }
430
431 #[test]
432 fn strikethrough() {
433 let r = parse_markdown_formatting("~~struck~~").unwrap();
434 assert_eq!(r.clean_text, "struck");
435 assert_eq!(r.formatting.len(), 1);
436 assert_eq!(r.formatting[0].start, 0);
437 assert_eq!(r.formatting[0].length, 6);
438 assert_eq!(
439 r.formatting[0].styles,
440 vec![TextFormattingStyle::Strikethrough]
441 );
442 }
443
444 #[test]
445 fn bold_italic_stars() {
446 let r = parse_markdown_formatting("***both***").unwrap();
447 assert_eq!(r.clean_text, "both");
448 assert_eq!(r.formatting.len(), 1);
449 assert_eq!(r.formatting[0].start, 0);
450 assert_eq!(r.formatting[0].length, 4);
451 assert_eq!(
452 r.formatting[0].styles,
453 vec![TextFormattingStyle::Bold, TextFormattingStyle::Italic]
454 );
455 }
456
457 #[test]
458 fn bold_underscore() {
459 let r = parse_markdown_formatting("__bold__").unwrap();
460 assert_eq!(r.clean_text, "bold");
461 assert_eq!(r.formatting.len(), 1);
462 assert_eq!(r.formatting[0].start, 0);
463 assert_eq!(r.formatting[0].length, 4);
464 assert_eq!(r.formatting[0].styles, vec![TextFormattingStyle::Bold]);
465 }
466
467 #[test]
468 fn italic_underscore() {
469 let r = parse_markdown_formatting("_italic_").unwrap();
470 assert_eq!(r.clean_text, "italic");
471 assert_eq!(r.formatting.len(), 1);
472 assert_eq!(r.formatting[0].start, 0);
473 assert_eq!(r.formatting[0].length, 6);
474 assert_eq!(r.formatting[0].styles, vec![TextFormattingStyle::Italic]);
475 }
476
477 #[test]
478 fn bold_italic_underscore() {
479 let r = parse_markdown_formatting("___both___").unwrap();
480 assert_eq!(r.clean_text, "both");
481 assert_eq!(r.formatting.len(), 1);
482 assert_eq!(r.formatting[0].start, 0);
483 assert_eq!(r.formatting[0].length, 4);
484 assert_eq!(
485 r.formatting[0].styles,
486 vec![TextFormattingStyle::Bold, TextFormattingStyle::Italic]
487 );
488 }
489
490 #[test]
491 fn mid_word_underscores_preserved() {
492 assert!(parse_markdown_formatting("some_var_name").is_none());
493 }
494
495 #[test]
496 fn urls_protected() {
497 assert!(
498 parse_markdown_formatting("https://en.wikipedia.org/wiki/Hong_Kong_Island").is_none()
499 );
500 }
501
502 #[test]
503 fn code_spans_protected() {
504 assert!(parse_markdown_formatting("`*not italic*`").is_none());
505 }
506
507 #[test]
508 fn backslash_escapes() {
509 let r = parse_markdown_formatting("\\*literal\\*").unwrap();
510 assert_eq!(r.clean_text, "*literal*");
511 assert!(r.formatting.is_empty());
512 }
513
514 #[test]
515 fn mixed_formatting_correct_offsets() {
516 let r = parse_markdown_formatting("**bold** and *italic*").unwrap();
517 assert_eq!(r.clean_text, "bold and italic");
518 assert_eq!(r.formatting.len(), 2);
519 assert_eq!(r.formatting[0].start, 0);
520 assert_eq!(r.formatting[0].length, 4);
521 assert_eq!(r.formatting[0].styles, vec![TextFormattingStyle::Bold]);
522 assert_eq!(r.formatting[1].start, 9);
523 assert_eq!(r.formatting[1].length, 6);
524 assert_eq!(r.formatting[1].styles, vec![TextFormattingStyle::Italic]);
525 }
526
527 #[test]
528 fn fenced_code_blocks_protected() {
529 assert!(parse_markdown_formatting("```\n**not bold**\n```").is_none());
530 }
531
532 #[test]
533 fn nested_bold_italic() {
534 let r = parse_markdown_formatting("**_bold italic_**").unwrap();
535 assert_eq!(r.clean_text, "bold italic");
536 let all_styles: Vec<_> = r.formatting.iter().flat_map(|r| &r.styles).collect();
537 assert!(all_styles.contains(&&TextFormattingStyle::Bold));
538 assert!(all_styles.contains(&&TextFormattingStyle::Italic));
539 }
540
541 #[test]
542 fn validate_valid_formatting() {
543 let f = json!([{"start": 0, "length": 4, "styles": ["bold"]}]);
544 assert!(validate_text_formatting(&f, "test").is_ok());
545 }
546
547 #[test]
548 fn validate_empty_message() {
549 let f = json!([{"start": 0, "length": 1, "styles": ["bold"]}]);
550 assert!(validate_text_formatting(&f, "").is_err());
551 }
552
553 #[test]
554 fn validate_range_exceeds() {
555 let f = json!([{"start": 2, "length": 10, "styles": ["bold"]}]);
556 assert!(validate_text_formatting(&f, "test").is_err());
557 }
558
559 #[test]
560 fn validate_invalid_style() {
561 let f = json!([{"start": 0, "length": 1, "styles": ["comic-sans"]}]);
562 assert!(validate_text_formatting(&f, "test").is_err());
563 }
564
565 #[test]
566 fn has_formatting_works() {
567 assert!(!has_text_formatting(None));
568 assert!(!has_text_formatting(Some(&json!([]))));
569 assert!(has_text_formatting(Some(&json!([{"start": 0}]))));
570 }
571}