Skip to main content

text_typeset/layout/
inline_markup.rs

1//! Minimal inline-markup parser for label text.
2//!
3//! Supports:
4//! - `[label](url)` — inline link
5//! - `*italic*`     — italic run
6//! - `**bold**`     — bold run
7//!
8//! Escapes: `\[`, `\]`, `\(`, `\)`, `\*`, `\\`. Unclosed markers fall back
9//! to literal text — the parser never throws input away. Nesting works in
10//! the obvious cases (`**bold *italic* bold**`, `[**bold link**](url)`).
11//!
12//! This module is independent of text-document: it only produces a small
13//! `InlineMarkup` representation that `Typesetter::layout_single_line_markup`
14//! and `Typesetter::layout_paragraph_markup` consume for tooltip / rich
15//! label content.
16
17use std::ops::Range;
18
19/// Per-span style attributes. Link is orthogonal and carried on
20/// [`InlineSpan::link_url`] directly.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
22pub struct InlineAttrs(u8);
23
24impl InlineAttrs {
25    pub const EMPTY: Self = Self(0);
26    pub const BOLD: Self = Self(1 << 0);
27    pub const ITALIC: Self = Self(1 << 1);
28
29    pub fn empty() -> Self {
30        Self::EMPTY
31    }
32    pub fn is_bold(self) -> bool {
33        self.0 & Self::BOLD.0 != 0
34    }
35    pub fn is_italic(self) -> bool {
36        self.0 & Self::ITALIC.0 != 0
37    }
38    pub fn contains(self, other: Self) -> bool {
39        (self.0 & other.0) == other.0
40    }
41}
42
43impl std::ops::BitOr for InlineAttrs {
44    type Output = Self;
45    fn bitor(self, rhs: Self) -> Self {
46        Self(self.0 | rhs.0)
47    }
48}
49
50impl std::ops::BitOrAssign for InlineAttrs {
51    fn bitor_assign(&mut self, rhs: Self) {
52        self.0 |= rhs.0;
53    }
54}
55
56/// One parsed span of a minimally-marked-up string.
57#[derive(Debug, Clone)]
58pub struct InlineSpan {
59    /// Visible text of this span (link label for links).
60    pub text: String,
61    /// Bold / italic attributes.
62    pub attrs: InlineAttrs,
63    /// `Some(url)` if this span is a link.
64    pub link_url: Option<String>,
65    /// Byte range into the original source string.
66    pub byte_range: Range<usize>,
67}
68
69/// Parsed input ready for shaping. Preserves the original source for
70/// diagnostics and round-tripping.
71#[derive(Debug, Clone)]
72pub struct InlineMarkup {
73    pub source: String,
74    pub spans: Vec<InlineSpan>,
75}
76
77impl InlineMarkup {
78    /// Parse a minimal markdown subset.
79    pub fn parse(source: &str) -> Self {
80        let spans = parse_spans(source, 0);
81        Self {
82            source: source.to_string(),
83            spans,
84        }
85    }
86
87    /// Shortcut for plain text (no markup) — a single span, no attrs.
88    pub fn plain(text: impl Into<String>) -> Self {
89        let s: String = text.into();
90        let spans = if s.is_empty() {
91            Vec::new()
92        } else {
93            let len = s.len();
94            vec![InlineSpan {
95                text: s.clone(),
96                attrs: InlineAttrs::EMPTY,
97                link_url: None,
98                byte_range: 0..len,
99            }]
100        };
101        Self { source: s, spans }
102    }
103
104    /// Flatten the markup to its visible text only (no attributes, no urls).
105    pub fn flatten_plain(&self) -> String {
106        self.spans.iter().map(|s| s.text.as_str()).collect()
107    }
108
109    pub fn is_empty(&self) -> bool {
110        self.spans.is_empty()
111    }
112}
113
114// --- parser ------------------------------------------------------------
115
116fn parse_spans(source: &str, base_offset: usize) -> Vec<InlineSpan> {
117    let bytes = source.as_bytes();
118    let mut out: Vec<InlineSpan> = Vec::new();
119    let mut i: usize = 0;
120    let mut text_start: usize = 0;
121    let mut text_buf = String::new();
122
123    let flush_text =
124        |out: &mut Vec<InlineSpan>, text_buf: &mut String, text_start: usize, end: usize| {
125            if !text_buf.is_empty() {
126                out.push(InlineSpan {
127                    text: std::mem::take(text_buf),
128                    attrs: InlineAttrs::EMPTY,
129                    link_url: None,
130                    byte_range: (base_offset + text_start)..(base_offset + end),
131                });
132            }
133        };
134
135    while i < bytes.len() {
136        let b = bytes[i];
137
138        // Escape: \X → literal X
139        if b == b'\\' && i + 1 < bytes.len() {
140            let next = bytes[i + 1];
141            if matches!(next, b'[' | b']' | b'(' | b')' | b'*' | b'\\') {
142                if text_buf.is_empty() {
143                    text_start = i;
144                }
145                text_buf.push(next as char);
146                i += 2;
147                continue;
148            }
149        }
150
151        // Bold **…**
152        if b == b'*'
153            && i + 1 < bytes.len()
154            && bytes[i + 1] == b'*'
155            && let Some(close) = find_marker(source, i + 2, "**")
156            && close > i + 2
157        {
158            flush_text(&mut out, &mut text_buf, text_start, i);
159            let inner = &source[i + 2..close];
160            let mut inner_spans = parse_spans(inner, base_offset + i + 2);
161            for sp in inner_spans.iter_mut() {
162                sp.attrs |= InlineAttrs::BOLD;
163            }
164            out.extend(inner_spans);
165            i = close + 2;
166            text_start = i;
167            continue;
168        }
169
170        // Italic *…*
171        if b == b'*'
172            && let Some(close) = find_marker(source, i + 1, "*")
173            && close > i + 1
174        {
175            // Don't consume a `*` that's actually the start of a `**`.
176            let close_is_double = close + 1 < bytes.len() && bytes[close + 1] == b'*';
177            if !close_is_double {
178                flush_text(&mut out, &mut text_buf, text_start, i);
179                let inner = &source[i + 1..close];
180                let mut inner_spans = parse_spans(inner, base_offset + i + 1);
181                for sp in inner_spans.iter_mut() {
182                    sp.attrs |= InlineAttrs::ITALIC;
183                }
184                out.extend(inner_spans);
185                i = close + 1;
186                text_start = i;
187                continue;
188            }
189        }
190
191        // Link [label](url)
192        if b == b'['
193            && let Some(close_label) = find_bracket_close(source, i + 1)
194            && close_label + 1 < bytes.len()
195            && bytes[close_label + 1] == b'('
196            && let Some(close_paren) = find_paren_close(source, close_label + 2)
197        {
198            flush_text(&mut out, &mut text_buf, text_start, i);
199            let label = source[i + 1..close_label].to_string();
200            let url = source[close_label + 2..close_paren].to_string();
201            out.push(InlineSpan {
202                text: label,
203                attrs: InlineAttrs::EMPTY,
204                link_url: Some(url),
205                byte_range: (base_offset + i)..(base_offset + close_paren + 1),
206            });
207            i = close_paren + 1;
208            text_start = i;
209            continue;
210        }
211
212        // Literal char. Advance by UTF-8 scalar length so we never split a
213        // multi-byte sequence.
214        if text_buf.is_empty() {
215            text_start = i;
216        }
217        let ch_len = utf8_char_len(b);
218        let ch_end = (i + ch_len).min(bytes.len());
219        text_buf.push_str(&source[i..ch_end]);
220        i = ch_end;
221    }
222
223    flush_text(&mut out, &mut text_buf, text_start, bytes.len());
224    out
225}
226
227fn utf8_char_len(first: u8) -> usize {
228    match first {
229        0x00..=0x7F => 1,
230        0xC2..=0xDF => 2,
231        0xE0..=0xEF => 3,
232        0xF0..=0xF4 => 4,
233        _ => 1,
234    }
235}
236
237/// Find the next occurrence of `marker` in `source` starting at `from`,
238/// skipping escaped characters. Returns the byte index of the first byte
239/// of the marker, or `None` if unmatched.
240fn find_marker(source: &str, from: usize, marker: &str) -> Option<usize> {
241    let bytes = source.as_bytes();
242    let mk = marker.as_bytes();
243    if mk.is_empty() {
244        return None;
245    }
246    let mut i = from;
247    while i + mk.len() <= bytes.len() {
248        if bytes[i] == b'\\' && i + 1 < bytes.len() {
249            i += 2;
250            continue;
251        }
252        if bytes[i..i + mk.len()] == *mk {
253            return Some(i);
254        }
255        i += 1;
256    }
257    None
258}
259
260fn find_bracket_close(source: &str, from: usize) -> Option<usize> {
261    let bytes = source.as_bytes();
262    let mut i = from;
263    while i < bytes.len() {
264        if bytes[i] == b'\\' && i + 1 < bytes.len() {
265            i += 2;
266            continue;
267        }
268        if bytes[i] == b']' {
269            return Some(i);
270        }
271        i += 1;
272    }
273    None
274}
275
276fn find_paren_close(source: &str, from: usize) -> Option<usize> {
277    let bytes = source.as_bytes();
278    let mut i = from;
279    while i < bytes.len() {
280        if bytes[i] == b'\\' && i + 1 < bytes.len() {
281            i += 2;
282            continue;
283        }
284        if bytes[i] == b')' {
285            return Some(i);
286        }
287        i += 1;
288    }
289    None
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn plain_text_produces_single_span() {
298        let m = InlineMarkup::parse("hello world");
299        assert_eq!(m.spans.len(), 1);
300        assert_eq!(m.spans[0].text, "hello world");
301        assert!(m.spans[0].link_url.is_none());
302        assert_eq!(m.spans[0].attrs, InlineAttrs::EMPTY);
303        assert_eq!(m.spans[0].byte_range, 0..11);
304    }
305
306    #[test]
307    fn empty_input_produces_no_spans() {
308        let m = InlineMarkup::parse("");
309        assert!(m.spans.is_empty());
310    }
311
312    #[test]
313    fn link_between_text() {
314        let m = InlineMarkup::parse("see [docs](https://x) now");
315        assert_eq!(m.spans.len(), 3);
316        assert_eq!(m.spans[0].text, "see ");
317        assert_eq!(m.spans[1].text, "docs");
318        assert_eq!(m.spans[1].link_url.as_deref(), Some("https://x"));
319        assert_eq!(m.spans[2].text, " now");
320    }
321
322    #[test]
323    fn two_adjacent_links() {
324        let m = InlineMarkup::parse("[a](b)[c](d)");
325        assert_eq!(m.spans.len(), 2);
326        assert_eq!(m.spans[0].text, "a");
327        assert_eq!(m.spans[0].link_url.as_deref(), Some("b"));
328        assert_eq!(m.spans[1].text, "c");
329        assert_eq!(m.spans[1].link_url.as_deref(), Some("d"));
330    }
331
332    #[test]
333    fn unclosed_bracket_is_literal() {
334        let m = InlineMarkup::parse("unclosed [bracket text");
335        assert_eq!(m.spans.len(), 1);
336        assert_eq!(m.spans[0].text, "unclosed [bracket text");
337    }
338
339    #[test]
340    fn escaped_brackets_are_literal() {
341        let m = InlineMarkup::parse(r"\[not a link\]");
342        assert_eq!(m.spans.len(), 1);
343        assert_eq!(m.spans[0].text, "[not a link]");
344    }
345
346    #[test]
347    fn empty_label_link_still_parses() {
348        let m = InlineMarkup::parse("[](url)");
349        assert_eq!(m.spans.len(), 1);
350        assert_eq!(m.spans[0].text, "");
351        assert_eq!(m.spans[0].link_url.as_deref(), Some("url"));
352    }
353
354    #[test]
355    fn empty_url_link_still_parses() {
356        let m = InlineMarkup::parse("[label]()");
357        assert_eq!(m.spans.len(), 1);
358        assert_eq!(m.spans[0].text, "label");
359        assert_eq!(m.spans[0].link_url.as_deref(), Some(""));
360    }
361
362    #[test]
363    fn bold_wraps_inner_text() {
364        let m = InlineMarkup::parse("a **b** c");
365        assert_eq!(m.spans.len(), 3);
366        assert_eq!(m.spans[0].text, "a ");
367        assert!(!m.spans[0].attrs.is_bold());
368        assert_eq!(m.spans[1].text, "b");
369        assert!(m.spans[1].attrs.is_bold());
370        assert!(!m.spans[1].attrs.is_italic());
371        assert_eq!(m.spans[2].text, " c");
372    }
373
374    #[test]
375    fn italic_wraps_inner_text() {
376        let m = InlineMarkup::parse("a *b* c");
377        assert_eq!(m.spans.len(), 3);
378        assert!(m.spans[1].attrs.is_italic());
379        assert!(!m.spans[1].attrs.is_bold());
380    }
381
382    #[test]
383    fn bold_italic_nesting() {
384        let m = InlineMarkup::parse("**bold *italic* bold**");
385        // Inside bold: "bold ", "italic" (italic), " bold" — all bold.
386        assert!(m.spans.iter().all(|s| s.attrs.is_bold()));
387        assert!(m.spans.iter().any(|s| s.attrs.is_italic()));
388    }
389
390    #[test]
391    fn link_inside_bold() {
392        let m = InlineMarkup::parse("**see [docs](url)**");
393        assert!(m.spans.iter().all(|s| s.attrs.is_bold()));
394        assert!(m.spans.iter().any(|s| s.link_url.is_some()));
395    }
396
397    #[test]
398    fn unclosed_bold_is_literal() {
399        let m = InlineMarkup::parse("**unclosed");
400        assert_eq!(m.spans.len(), 1);
401        assert_eq!(m.spans[0].text, "**unclosed");
402    }
403
404    #[test]
405    fn tooltip_key_url_passes_through_verbatim() {
406        // The `:key` URL scheme is recognized by the tooltip widget,
407        // not the parser. Parser just stores the URL as-is.
408        let m = InlineMarkup::parse("click [here](:my-key) to learn more");
409        let link = m.spans.iter().find(|s| s.link_url.is_some()).unwrap();
410        assert_eq!(link.text, "here");
411        assert_eq!(link.link_url.as_deref(), Some(":my-key"));
412    }
413
414    #[test]
415    fn flatten_plain_concatenates_text() {
416        let m = InlineMarkup::parse("a **b** [c](d) e");
417        assert_eq!(m.flatten_plain(), "a b c e");
418    }
419
420    #[test]
421    fn utf8_multibyte_characters_preserved() {
422        let m = InlineMarkup::parse("café ☕ résumé");
423        assert_eq!(m.spans.len(), 1);
424        assert_eq!(m.spans[0].text, "café ☕ résumé");
425    }
426
427    #[test]
428    fn byte_ranges_are_absolute_into_source() {
429        let m = InlineMarkup::parse("a [b](c) d");
430        // "a " = 0..2
431        // "[b](c)" = 2..8
432        // " d" = 8..10
433        assert_eq!(m.spans[0].byte_range, 0..2);
434        assert_eq!(m.spans[1].byte_range, 2..8);
435        assert_eq!(m.spans[2].byte_range, 8..10);
436    }
437}