Skip to main content

citum_engine/render/
rich_text.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Djot and org-mode inline markup rendering for free-text fields.
7
8use super::format::OutputFormat;
9use jotdown::{Attributes, Container, Event, Parser};
10
11#[derive(Default)]
12struct DjotFrame {
13    children: Vec<String>,
14    classes: Vec<String>,
15    link_url: Option<String>,
16    has_explicit_link: bool,
17    last_char: Option<char>,
18    /// True when this frame (or an ancestor) carries `.nocase` protection.
19    case_protected: bool,
20}
21
22impl DjotFrame {
23    fn push_rendered(&mut self, rendered: String, logical_last_char: Option<char>) {
24        self.children.push(rendered);
25        if let Some(ch) = logical_last_char {
26            self.last_char = Some(ch);
27        }
28    }
29
30    fn prev_opens_quote(&self) -> bool {
31        self.last_char
32            .is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c))
33    }
34}
35
36fn span_classes(attrs: Option<&Attributes>) -> Vec<String> {
37    attrs
38        .into_iter()
39        .flat_map(|attrs| attrs.iter())
40        .filter_map(|(kind, val)| {
41            use jotdown::AttributeKind;
42            if matches!(kind, AttributeKind::Class) {
43                Some(val.to_string())
44            } else {
45                None
46            }
47        })
48        .flat_map(|classes| {
49            classes
50                .split_whitespace()
51                .map(std::string::ToString::to_string)
52                .collect::<Vec<_>>()
53        })
54        .collect()
55}
56
57/// Process a djot End container event, applying formatting and merging into parent frame.
58fn handle_end_event<F: OutputFormat<Output = String>>(
59    container: Container,
60    frame: DjotFrame,
61    parent: &mut DjotFrame,
62    fmt: &F,
63) {
64    let inner_text = frame.children.join("");
65    let formatted = match container {
66        Container::Emphasis => fmt.emph(inner_text),
67        Container::Strong => fmt.strong(inner_text),
68        Container::Link(_, _) => {
69            if let Some(url) = frame.link_url.as_deref() {
70                fmt.link(url, inner_text)
71            } else {
72                inner_text
73            }
74        }
75        Container::Span => {
76            if frame
77                .classes
78                .iter()
79                .any(|class| class == "smallcaps" || class == "small-caps")
80            {
81                fmt.small_caps(inner_text)
82            } else {
83                inner_text
84            }
85        }
86        _ => inner_text,
87    };
88    parent.push_rendered(formatted, frame.last_char);
89    parent.has_explicit_link |= frame.has_explicit_link;
90}
91
92fn render_djot_inline_internal<F, G>(src: &str, fmt: &F, mut transform_text: G) -> (String, bool)
93where
94    F: OutputFormat<Output = String>,
95    G: FnMut(&str) -> String,
96{
97    let parser = Parser::new(src);
98    let mut stack = vec![DjotFrame::default()];
99
100    for event in parser {
101        match event {
102            Event::Start(container, attrs) => {
103                let link_url = if let Container::Link(url, _) = &container {
104                    Some(url.to_string())
105                } else {
106                    None
107                };
108                let classes = span_classes(Some(&attrs));
109                let parent_protected = stack.last().is_some_and(|f| f.case_protected);
110                let is_nocase = classes.iter().any(|c| c == "nocase");
111                stack.push(DjotFrame {
112                    case_protected: parent_protected || is_nocase,
113                    has_explicit_link: link_url.is_some(),
114                    link_url,
115                    classes,
116                    ..Default::default()
117                });
118            }
119            Event::End(container) => {
120                if let (Some(frame), Some(parent)) = (stack.pop(), stack.last_mut()) {
121                    handle_end_event(container, frame, parent, fmt);
122                }
123            }
124            Event::Str(s) => {
125                if let Some(frame) = stack.last_mut() {
126                    // Always call transform_text so stateful transforms (e.g., sentence-case)
127                    // can update their internal state, even for .nocase-protected spans.
128                    let transformed = transform_text(s.as_ref());
129                    let render_text = if frame.case_protected {
130                        s.to_string()
131                    } else {
132                        transformed
133                    };
134                    frame.push_rendered(fmt.text(&render_text), render_text.chars().last());
135                }
136            }
137            Event::Symbol(sym) => {
138                if let Some(frame) = stack.last_mut() {
139                    frame.push_rendered(fmt.text(sym.as_ref()), sym.chars().last());
140                }
141            }
142            Event::LeftSingleQuote => {
143                if let Some(frame) = stack.last_mut() {
144                    frame.push_rendered(fmt.text("\u{2018}"), Some('\u{2018}'));
145                }
146            }
147            Event::RightSingleQuote => {
148                if let Some(frame) = stack.last_mut() {
149                    let quote = if frame.prev_opens_quote() {
150                        '\u{2018}'
151                    } else {
152                        '\u{2019}'
153                    };
154                    frame.push_rendered(fmt.text(&quote.to_string()), Some(quote));
155                }
156            }
157            Event::LeftDoubleQuote => {
158                if let Some(frame) = stack.last_mut() {
159                    frame.push_rendered(fmt.text("\u{201C}"), Some('\u{201C}'));
160                }
161            }
162            Event::RightDoubleQuote => {
163                if let Some(frame) = stack.last_mut() {
164                    let quote = if frame.prev_opens_quote() {
165                        '\u{201C}'
166                    } else {
167                        '\u{201D}'
168                    };
169                    frame.push_rendered(fmt.text(&quote.to_string()), Some(quote));
170                }
171            }
172            Event::Softbreak | Event::Hardbreak => {
173                if let Some(frame) = stack.last_mut() {
174                    frame.push_rendered(fmt.text(" "), Some(' '));
175                }
176            }
177            _ => {}
178        }
179    }
180
181    stack
182        .into_iter()
183        .next()
184        .map(|frame| (frame.children.join(""), frame.has_explicit_link))
185        .unwrap_or_default()
186}
187
188/// Render djot inline markup and map events to `OutputFormat` methods.
189///
190/// Parses the input as djot inline markup and transforms container and text
191/// events into formatted output. Block-level containers are collapsed to their
192/// text content. Inline containers (emphasis, strong, links, etc.) are rendered
193/// using the format's methods.
194///
195/// # Arguments
196/// * `src` - Input string with djot inline markup
197/// * `fmt` - `OutputFormat` implementation for rendering
198///
199/// # Returns
200/// Formatted string with markup applied according to the `OutputFormat`'s methods
201pub fn render_djot_inline<F: OutputFormat<Output = String>>(src: &str, fmt: &F) -> String {
202    render_djot_inline_internal(src, fmt, str::to_string).0
203}
204
205/// Render djot inline markup while transforming text leaves and returning link metadata.
206pub(crate) fn render_djot_inline_with_transform<F, G>(
207    src: &str,
208    fmt: &F,
209    transform_text: G,
210) -> (String, bool)
211where
212    F: OutputFormat<Output = String>,
213    G: FnMut(&str) -> String,
214{
215    render_djot_inline_internal(src, fmt, transform_text)
216}
217
218/// Render org-mode inline markup by walking the orgize event stream.
219///
220/// Parses `src` as org-mode and maps inline elements to `OutputFormat` methods:
221/// bold (`*text*`) → `strong`, italic (`/text/`) → `emph`, verbatim/code →
222/// `text` (stripped), links (`[[url][desc]]`) → `link`, plain text → `text`.
223/// Container elements (Bold, Italic) are collected via a stack so nested
224/// markup is handled correctly.
225pub fn render_org_inline<F: OutputFormat<Output = String>>(src: &str, fmt: &F) -> String {
226    use orgize::Event;
227    use orgize::Org;
228    use orgize::elements::Element;
229
230    let org = Org::parse(src);
231    // Stack of (tag, accumulated_children) for open containers.
232    // Tags: 0 = Bold, 1 = Italic, 2 = root paragraph accumulator.
233    let mut stack: Vec<(u8, String)> = vec![(2, String::new())];
234
235    for event in org.iter() {
236        match event {
237            Event::Start(Element::Bold) => stack.push((0, String::new())),
238            Event::Start(Element::Italic) => stack.push((1, String::new())),
239            Event::End(Element::Bold) => {
240                if let Some((0, inner)) = stack.pop() {
241                    let rendered = fmt.strong(inner);
242                    if let Some(top) = stack.last_mut() {
243                        top.1.push_str(&rendered);
244                    }
245                }
246            }
247            Event::End(Element::Italic) => {
248                if let Some((1, inner)) = stack.pop() {
249                    let rendered = fmt.emph(inner);
250                    if let Some(top) = stack.last_mut() {
251                        top.1.push_str(&rendered);
252                    }
253                }
254            }
255            Event::Start(Element::Link(link)) => {
256                let desc = link.desc.as_deref().unwrap_or(&link.path);
257                let rendered = fmt.link(&link.path, fmt.text(desc));
258                if let Some(top) = stack.last_mut() {
259                    top.1.push_str(&rendered);
260                }
261            }
262            Event::Start(Element::Text { value }) => {
263                if let Some(top) = stack.last_mut() {
264                    top.1.push_str(&fmt.text(value));
265                }
266            }
267            Event::Start(Element::Verbatim { value } | Element::Code { value }) => {
268                if let Some(top) = stack.last_mut() {
269                    top.1.push_str(&fmt.text(value));
270                }
271            }
272            _ => {}
273        }
274    }
275
276    stack.into_iter().next().map(|(_, s)| s).unwrap_or_default()
277}
278
279#[cfg(test)]
280#[allow(
281    clippy::unwrap_used,
282    clippy::expect_used,
283    clippy::panic,
284    clippy::indexing_slicing,
285    clippy::todo,
286    clippy::unimplemented,
287    clippy::unreachable,
288    clippy::get_unwrap,
289    reason = "Panicking is acceptable and often desired in tests."
290)]
291mod tests {
292    use super::*;
293    use crate::render::html::Html;
294    use crate::render::plain::PlainText;
295    use crate::render::typst::Typst;
296
297    #[test]
298    fn test_djot_emphasis_plain() {
299        let fmt = PlainText;
300        let result = render_djot_inline("_foo_", &fmt);
301        // PlainText.emph() wraps content in _..._
302        assert_eq!(result, "_foo_");
303    }
304
305    #[test]
306    fn test_djot_strong_single_asterisk() {
307        let fmt = PlainText;
308        // jotdown uses * for strong (bold), not **
309        let result = render_djot_inline("*bar*", &fmt);
310        // PlainText.strong() wraps content in **...**
311        assert_eq!(result, "**bar**");
312    }
313
314    #[test]
315    fn test_djot_unicode_math() {
316        let fmt = PlainText;
317        let result = render_djot_inline("H₂O", &fmt);
318        assert_eq!(result, "H₂O");
319    }
320
321    #[test]
322    fn test_djot_plain_no_markup() {
323        let fmt = PlainText;
324        let result = render_djot_inline("plain text with no markup", &fmt);
325        assert_eq!(result, "plain text with no markup");
326    }
327
328    #[test]
329    fn test_djot_combined_formatting() {
330        let fmt = PlainText;
331        // In djot, _text_ is emphasis and *text* is strong
332        let result = render_djot_inline("_emphasized *bold* text_", &fmt);
333        // Emphasis wraps in _..._. Inside that, strong wraps in **...**
334        assert_eq!(result, "_emphasized **bold** text_");
335    }
336
337    #[test]
338    fn test_djot_link() {
339        let fmt = PlainText;
340        // In djot, [text](url) is a link
341        let result = render_djot_inline("[click here](https://example.com)", &fmt);
342        // PlainText.link() just renders the link text (ignores URL)
343        assert_eq!(result, "click here");
344    }
345
346    #[test]
347    fn test_djot_nested_formatting_preserves_typst_markup() {
348        let fmt = Typst;
349        let result = render_djot_inline("_emphasized *bold* text_", &fmt);
350        assert_eq!(result, "_emphasized *bold* text_");
351    }
352
353    #[test]
354    fn test_djot_nested_link_preserves_inner_markup_html() {
355        let fmt = Html;
356        let result = render_djot_inline("[_linked emphasis_](https://example.com)", &fmt);
357        assert_eq!(
358            result,
359            r#"<a href="https://example.com"><i>linked emphasis</i></a>"#
360        );
361    }
362
363    #[test]
364    fn test_djot_quotes_inside_emphasis_open_correctly() {
365        let fmt = PlainText;
366        let result = render_djot_inline("_\"Parmenides\" dialogue_", &fmt);
367        assert_eq!(result, "_“Parmenides” dialogue_");
368    }
369
370    #[test]
371    fn test_org_plain_text() {
372        let fmt = PlainText;
373        let result = render_org_inline("plain text with no markup", &fmt);
374        assert_eq!(result, "plain text with no markup");
375    }
376
377    #[test]
378    fn test_org_bold() {
379        let fmt = PlainText;
380        // PlainText.strong() wraps in **...**
381        let result = render_org_inline("*bold*", &fmt);
382        assert_eq!(result, "**bold**");
383    }
384
385    #[test]
386    fn test_org_italic() {
387        let fmt = PlainText;
388        // PlainText.emph() wraps in _..._
389        let result = render_org_inline("/italic/", &fmt);
390        assert_eq!(result, "_italic_");
391    }
392}