1use std::ops::Range;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
22pub struct InlineAttrs(u8);
23
24impl InlineAttrs {
25 pub const EMPTY: Self = Self(0);
26 pub const BOLD: Self = Self(1 << 0);
27 pub const ITALIC: Self = Self(1 << 1);
28
29 pub fn empty() -> Self {
30 Self::EMPTY
31 }
32 pub fn is_bold(self) -> bool {
33 self.0 & Self::BOLD.0 != 0
34 }
35 pub fn is_italic(self) -> bool {
36 self.0 & Self::ITALIC.0 != 0
37 }
38 pub fn contains(self, other: Self) -> bool {
39 (self.0 & other.0) == other.0
40 }
41}
42
43impl std::ops::BitOr for InlineAttrs {
44 type Output = Self;
45 fn bitor(self, rhs: Self) -> Self {
46 Self(self.0 | rhs.0)
47 }
48}
49
50impl std::ops::BitOrAssign for InlineAttrs {
51 fn bitor_assign(&mut self, rhs: Self) {
52 self.0 |= rhs.0;
53 }
54}
55
56#[derive(Debug, Clone)]
58pub struct InlineSpan {
59 pub text: String,
61 pub attrs: InlineAttrs,
63 pub link_url: Option<String>,
65 pub byte_range: Range<usize>,
67}
68
69#[derive(Debug, Clone)]
72pub struct InlineMarkup {
73 pub source: String,
74 pub spans: Vec<InlineSpan>,
75}
76
77impl InlineMarkup {
78 pub fn parse(source: &str) -> Self {
80 let spans = parse_spans(source, 0);
81 Self {
82 source: source.to_string(),
83 spans,
84 }
85 }
86
87 pub fn plain(text: impl Into<String>) -> Self {
89 let s: String = text.into();
90 let spans = if s.is_empty() {
91 Vec::new()
92 } else {
93 let len = s.len();
94 vec![InlineSpan {
95 text: s.clone(),
96 attrs: InlineAttrs::EMPTY,
97 link_url: None,
98 byte_range: 0..len,
99 }]
100 };
101 Self { source: s, spans }
102 }
103
104 pub fn flatten_plain(&self) -> String {
106 self.spans.iter().map(|s| s.text.as_str()).collect()
107 }
108
109 pub fn is_empty(&self) -> bool {
110 self.spans.is_empty()
111 }
112}
113
114fn parse_spans(source: &str, base_offset: usize) -> Vec<InlineSpan> {
117 let bytes = source.as_bytes();
118 let mut out: Vec<InlineSpan> = Vec::new();
119 let mut i: usize = 0;
120 let mut text_start: usize = 0;
121 let mut text_buf = String::new();
122
123 let flush_text =
124 |out: &mut Vec<InlineSpan>, text_buf: &mut String, text_start: usize, end: usize| {
125 if !text_buf.is_empty() {
126 out.push(InlineSpan {
127 text: std::mem::take(text_buf),
128 attrs: InlineAttrs::EMPTY,
129 link_url: None,
130 byte_range: (base_offset + text_start)..(base_offset + end),
131 });
132 }
133 };
134
135 while i < bytes.len() {
136 let b = bytes[i];
137
138 if b == b'\\' && i + 1 < bytes.len() {
140 let next = bytes[i + 1];
141 if matches!(next, b'[' | b']' | b'(' | b')' | b'*' | b'\\') {
142 if text_buf.is_empty() {
143 text_start = i;
144 }
145 text_buf.push(next as char);
146 i += 2;
147 continue;
148 }
149 }
150
151 if b == b'*'
153 && i + 1 < bytes.len()
154 && bytes[i + 1] == b'*'
155 && let Some(close) = find_marker(source, i + 2, "**")
156 && close > i + 2
157 {
158 flush_text(&mut out, &mut text_buf, text_start, i);
159 let inner = &source[i + 2..close];
160 let mut inner_spans = parse_spans(inner, base_offset + i + 2);
161 for sp in inner_spans.iter_mut() {
162 sp.attrs |= InlineAttrs::BOLD;
163 }
164 out.extend(inner_spans);
165 i = close + 2;
166 text_start = i;
167 continue;
168 }
169
170 if b == b'*'
172 && let Some(close) = find_marker(source, i + 1, "*")
173 && close > i + 1
174 {
175 let close_is_double = close + 1 < bytes.len() && bytes[close + 1] == b'*';
177 if !close_is_double {
178 flush_text(&mut out, &mut text_buf, text_start, i);
179 let inner = &source[i + 1..close];
180 let mut inner_spans = parse_spans(inner, base_offset + i + 1);
181 for sp in inner_spans.iter_mut() {
182 sp.attrs |= InlineAttrs::ITALIC;
183 }
184 out.extend(inner_spans);
185 i = close + 1;
186 text_start = i;
187 continue;
188 }
189 }
190
191 if b == b'['
193 && let Some(close_label) = find_bracket_close(source, i + 1)
194 && close_label + 1 < bytes.len()
195 && bytes[close_label + 1] == b'('
196 && let Some(close_paren) = find_paren_close(source, close_label + 2)
197 {
198 flush_text(&mut out, &mut text_buf, text_start, i);
199 let label = source[i + 1..close_label].to_string();
200 let url = source[close_label + 2..close_paren].to_string();
201 out.push(InlineSpan {
202 text: label,
203 attrs: InlineAttrs::EMPTY,
204 link_url: Some(url),
205 byte_range: (base_offset + i)..(base_offset + close_paren + 1),
206 });
207 i = close_paren + 1;
208 text_start = i;
209 continue;
210 }
211
212 if text_buf.is_empty() {
215 text_start = i;
216 }
217 let ch_len = utf8_char_len(b);
218 let ch_end = (i + ch_len).min(bytes.len());
219 text_buf.push_str(&source[i..ch_end]);
220 i = ch_end;
221 }
222
223 flush_text(&mut out, &mut text_buf, text_start, bytes.len());
224 out
225}
226
227fn utf8_char_len(first: u8) -> usize {
228 match first {
229 0x00..=0x7F => 1,
230 0xC2..=0xDF => 2,
231 0xE0..=0xEF => 3,
232 0xF0..=0xF4 => 4,
233 _ => 1,
234 }
235}
236
237fn find_marker(source: &str, from: usize, marker: &str) -> Option<usize> {
241 let bytes = source.as_bytes();
242 let mk = marker.as_bytes();
243 if mk.is_empty() {
244 return None;
245 }
246 let mut i = from;
247 while i + mk.len() <= bytes.len() {
248 if bytes[i] == b'\\' && i + 1 < bytes.len() {
249 i += 2;
250 continue;
251 }
252 if bytes[i..i + mk.len()] == *mk {
253 return Some(i);
254 }
255 i += 1;
256 }
257 None
258}
259
260fn find_bracket_close(source: &str, from: usize) -> Option<usize> {
261 let bytes = source.as_bytes();
262 let mut i = from;
263 while i < bytes.len() {
264 if bytes[i] == b'\\' && i + 1 < bytes.len() {
265 i += 2;
266 continue;
267 }
268 if bytes[i] == b']' {
269 return Some(i);
270 }
271 i += 1;
272 }
273 None
274}
275
276fn find_paren_close(source: &str, from: usize) -> Option<usize> {
277 let bytes = source.as_bytes();
278 let mut i = from;
279 while i < bytes.len() {
280 if bytes[i] == b'\\' && i + 1 < bytes.len() {
281 i += 2;
282 continue;
283 }
284 if bytes[i] == b')' {
285 return Some(i);
286 }
287 i += 1;
288 }
289 None
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn plain_text_produces_single_span() {
298 let m = InlineMarkup::parse("hello world");
299 assert_eq!(m.spans.len(), 1);
300 assert_eq!(m.spans[0].text, "hello world");
301 assert!(m.spans[0].link_url.is_none());
302 assert_eq!(m.spans[0].attrs, InlineAttrs::EMPTY);
303 assert_eq!(m.spans[0].byte_range, 0..11);
304 }
305
306 #[test]
307 fn empty_input_produces_no_spans() {
308 let m = InlineMarkup::parse("");
309 assert!(m.spans.is_empty());
310 }
311
312 #[test]
313 fn link_between_text() {
314 let m = InlineMarkup::parse("see [docs](https://x) now");
315 assert_eq!(m.spans.len(), 3);
316 assert_eq!(m.spans[0].text, "see ");
317 assert_eq!(m.spans[1].text, "docs");
318 assert_eq!(m.spans[1].link_url.as_deref(), Some("https://x"));
319 assert_eq!(m.spans[2].text, " now");
320 }
321
322 #[test]
323 fn two_adjacent_links() {
324 let m = InlineMarkup::parse("[a](b)[c](d)");
325 assert_eq!(m.spans.len(), 2);
326 assert_eq!(m.spans[0].text, "a");
327 assert_eq!(m.spans[0].link_url.as_deref(), Some("b"));
328 assert_eq!(m.spans[1].text, "c");
329 assert_eq!(m.spans[1].link_url.as_deref(), Some("d"));
330 }
331
332 #[test]
333 fn unclosed_bracket_is_literal() {
334 let m = InlineMarkup::parse("unclosed [bracket text");
335 assert_eq!(m.spans.len(), 1);
336 assert_eq!(m.spans[0].text, "unclosed [bracket text");
337 }
338
339 #[test]
340 fn escaped_brackets_are_literal() {
341 let m = InlineMarkup::parse(r"\[not a link\]");
342 assert_eq!(m.spans.len(), 1);
343 assert_eq!(m.spans[0].text, "[not a link]");
344 }
345
346 #[test]
347 fn empty_label_link_still_parses() {
348 let m = InlineMarkup::parse("[](url)");
349 assert_eq!(m.spans.len(), 1);
350 assert_eq!(m.spans[0].text, "");
351 assert_eq!(m.spans[0].link_url.as_deref(), Some("url"));
352 }
353
354 #[test]
355 fn empty_url_link_still_parses() {
356 let m = InlineMarkup::parse("[label]()");
357 assert_eq!(m.spans.len(), 1);
358 assert_eq!(m.spans[0].text, "label");
359 assert_eq!(m.spans[0].link_url.as_deref(), Some(""));
360 }
361
362 #[test]
363 fn bold_wraps_inner_text() {
364 let m = InlineMarkup::parse("a **b** c");
365 assert_eq!(m.spans.len(), 3);
366 assert_eq!(m.spans[0].text, "a ");
367 assert!(!m.spans[0].attrs.is_bold());
368 assert_eq!(m.spans[1].text, "b");
369 assert!(m.spans[1].attrs.is_bold());
370 assert!(!m.spans[1].attrs.is_italic());
371 assert_eq!(m.spans[2].text, " c");
372 }
373
374 #[test]
375 fn italic_wraps_inner_text() {
376 let m = InlineMarkup::parse("a *b* c");
377 assert_eq!(m.spans.len(), 3);
378 assert!(m.spans[1].attrs.is_italic());
379 assert!(!m.spans[1].attrs.is_bold());
380 }
381
382 #[test]
383 fn bold_italic_nesting() {
384 let m = InlineMarkup::parse("**bold *italic* bold**");
385 assert!(m.spans.iter().all(|s| s.attrs.is_bold()));
387 assert!(m.spans.iter().any(|s| s.attrs.is_italic()));
388 }
389
390 #[test]
391 fn link_inside_bold() {
392 let m = InlineMarkup::parse("**see [docs](url)**");
393 assert!(m.spans.iter().all(|s| s.attrs.is_bold()));
394 assert!(m.spans.iter().any(|s| s.link_url.is_some()));
395 }
396
397 #[test]
398 fn unclosed_bold_is_literal() {
399 let m = InlineMarkup::parse("**unclosed");
400 assert_eq!(m.spans.len(), 1);
401 assert_eq!(m.spans[0].text, "**unclosed");
402 }
403
404 #[test]
405 fn tooltip_key_url_passes_through_verbatim() {
406 let m = InlineMarkup::parse("click [here](:my-key) to learn more");
409 let link = m.spans.iter().find(|s| s.link_url.is_some()).unwrap();
410 assert_eq!(link.text, "here");
411 assert_eq!(link.link_url.as_deref(), Some(":my-key"));
412 }
413
414 #[test]
415 fn flatten_plain_concatenates_text() {
416 let m = InlineMarkup::parse("a **b** [c](d) e");
417 assert_eq!(m.flatten_plain(), "a b c e");
418 }
419
420 #[test]
421 fn utf8_multibyte_characters_preserved() {
422 let m = InlineMarkup::parse("café ☕ résumé");
423 assert_eq!(m.spans.len(), 1);
424 assert_eq!(m.spans[0].text, "café ☕ résumé");
425 }
426
427 #[test]
428 fn byte_ranges_are_absolute_into_source() {
429 let m = InlineMarkup::parse("a [b](c) d");
430 assert_eq!(m.spans[0].byte_range, 0..2);
434 assert_eq!(m.spans[1].byte_range, 2..8);
435 assert_eq!(m.spans[2].byte_range, 8..10);
436 }
437}