Skip to main content

citum_engine/render/
rich_text.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Djot and org-mode inline markup rendering for free-text fields.
7
8use super::format::OutputFormat;
9use jotdown::{Attributes, Container, Event, Parser};
10
11/// Ambient context used while rendering inline rich text.
12#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
13pub struct InlineRenderContext {
14    /// Quote nesting depth inherited from an outer template wrapper.
15    pub quote_depth: usize,
16}
17
18#[derive(Default)]
19struct DjotFrame {
20    children: Vec<String>,
21    classes: Vec<String>,
22    link_url: Option<String>,
23    has_explicit_link: bool,
24    last_char: Option<char>,
25    /// True when this frame (or an ancestor) carries `.nocase` protection.
26    case_protected: bool,
27}
28
29impl DjotFrame {
30    fn push_rendered(&mut self, rendered: String, logical_last_char: Option<char>) {
31        self.children.push(rendered);
32        if let Some(ch) = logical_last_char {
33            self.last_char = Some(ch);
34        }
35    }
36
37    fn prev_opens_quote(&self) -> bool {
38        self.last_char
39            .is_none_or(|c| c.is_whitespace() || "([{\u{2018}\u{201C}'\"".contains(c))
40    }
41}
42
43fn opening_quote_depth(
44    context: InlineRenderContext,
45    current_depth: usize,
46    source_inner: bool,
47) -> usize {
48    if source_inner && current_depth <= context.quote_depth {
49        context.quote_depth + 1
50    } else {
51        current_depth
52    }
53}
54
55fn push_open_quote<F: OutputFormat<Output = String>>(frame: &mut DjotFrame, fmt: &F, depth: usize) {
56    let (open, _) = fmt.quote_marks(depth);
57    let logical_char = open.chars().next();
58    frame.push_rendered(fmt.text(open), logical_char);
59}
60
61fn push_close_quote<F: OutputFormat<Output = String>>(
62    frame: &mut DjotFrame,
63    fmt: &F,
64    depth: usize,
65) {
66    let (_, close) = fmt.quote_marks(depth);
67    let logical_char = close.chars().last();
68    frame.push_rendered(fmt.text(close), logical_char);
69}
70
71struct QuoteRenderState {
72    depth: usize,
73    stack: Vec<usize>,
74}
75
76impl QuoteRenderState {
77    fn new(context: InlineRenderContext) -> Self {
78        Self {
79            depth: context.quote_depth,
80            stack: Vec::new(),
81        }
82    }
83
84    fn render_event<F: OutputFormat<Output = String>>(
85        &mut self,
86        frame: &mut DjotFrame,
87        fmt: &F,
88        context: InlineRenderContext,
89        source_inner: bool,
90        opens_quote: bool,
91    ) {
92        if opens_quote {
93            let depth = opening_quote_depth(context, self.depth, source_inner);
94            push_open_quote(frame, fmt, depth);
95            self.stack.push(depth);
96            self.depth = depth + 1;
97        } else {
98            let fallback_depth = context.quote_depth + usize::from(source_inner);
99            let depth = self.stack.pop().unwrap_or(fallback_depth);
100            push_close_quote(frame, fmt, depth);
101            self.depth = self.stack.last().map_or(context.quote_depth, |d| d + 1);
102        }
103    }
104}
105
106fn span_classes(attrs: Option<&Attributes>) -> Vec<String> {
107    attrs
108        .into_iter()
109        .flat_map(|attrs| attrs.iter())
110        .filter_map(|(kind, val)| {
111            use jotdown::AttributeKind;
112            if matches!(kind, AttributeKind::Class) {
113                Some(val.to_string())
114            } else {
115                None
116            }
117        })
118        .flat_map(|classes| {
119            classes
120                .split_whitespace()
121                .map(std::string::ToString::to_string)
122                .collect::<Vec<_>>()
123        })
124        .collect()
125}
126
127/// Process a djot End container event, applying formatting and merging into parent frame.
128fn handle_end_event<F: OutputFormat<Output = String>>(
129    container: Container,
130    frame: DjotFrame,
131    parent: &mut DjotFrame,
132    fmt: &F,
133) {
134    let inner_text = frame.children.join("");
135    let formatted = match container {
136        Container::Emphasis => fmt.emph(inner_text),
137        Container::Strong => fmt.strong(inner_text),
138        Container::Link(_, _) => {
139            if let Some(url) = frame.link_url.as_deref() {
140                fmt.link(url, inner_text)
141            } else {
142                inner_text
143            }
144        }
145        Container::Span => {
146            if frame
147                .classes
148                .iter()
149                .any(|class| class == "smallcaps" || class == "small-caps")
150            {
151                fmt.small_caps(inner_text)
152            } else {
153                inner_text
154            }
155        }
156        _ => inner_text,
157    };
158    parent.push_rendered(formatted, frame.last_char);
159    parent.has_explicit_link |= frame.has_explicit_link;
160}
161
162fn render_djot_inline_internal<F, G>(
163    src: &str,
164    fmt: &F,
165    context: InlineRenderContext,
166    mut transform_text: G,
167) -> (String, bool)
168where
169    F: OutputFormat<Output = String>,
170    G: FnMut(&str) -> String,
171{
172    let parser = Parser::new(src);
173    let mut stack = vec![DjotFrame::default()];
174    let mut quote_state = QuoteRenderState::new(context);
175
176    for event in parser {
177        match event {
178            Event::Start(container, attrs) => {
179                let link_url = if let Container::Link(url, _) = &container {
180                    Some(url.to_string())
181                } else {
182                    None
183                };
184                let classes = span_classes(Some(&attrs));
185                let parent_protected = stack.last().is_some_and(|f| f.case_protected);
186                let is_nocase = classes.iter().any(|c| c == "nocase");
187                stack.push(DjotFrame {
188                    case_protected: parent_protected || is_nocase,
189                    has_explicit_link: link_url.is_some(),
190                    link_url,
191                    classes,
192                    ..Default::default()
193                });
194            }
195            Event::End(container) => {
196                if let (Some(frame), Some(parent)) = (stack.pop(), stack.last_mut()) {
197                    handle_end_event(container, frame, parent, fmt);
198                }
199            }
200            Event::Str(s) => {
201                if let Some(frame) = stack.last_mut() {
202                    // Always call transform_text so stateful transforms (e.g., sentence-case)
203                    // can update their internal state, even for .nocase-protected spans.
204                    let transformed = transform_text(s.as_ref());
205                    let render_text = if frame.case_protected {
206                        s.to_string()
207                    } else {
208                        transformed
209                    };
210                    frame.push_rendered(fmt.text(&render_text), render_text.chars().last());
211                }
212            }
213            Event::Symbol(sym) => {
214                if let Some(frame) = stack.last_mut() {
215                    frame.push_rendered(fmt.text(sym.as_ref()), sym.chars().last());
216                }
217            }
218            Event::LeftSingleQuote => {
219                if let Some(frame) = stack.last_mut() {
220                    quote_state.render_event(frame, fmt, context, true, true);
221                }
222            }
223            Event::RightSingleQuote => {
224                if let Some(frame) = stack.last_mut() {
225                    quote_state.render_event(frame, fmt, context, true, frame.prev_opens_quote());
226                }
227            }
228            Event::LeftDoubleQuote => {
229                if let Some(frame) = stack.last_mut() {
230                    quote_state.render_event(frame, fmt, context, false, true);
231                }
232            }
233            Event::RightDoubleQuote => {
234                if let Some(frame) = stack.last_mut() {
235                    quote_state.render_event(frame, fmt, context, false, frame.prev_opens_quote());
236                }
237            }
238            Event::Softbreak | Event::Hardbreak => {
239                if let Some(frame) = stack.last_mut() {
240                    frame.push_rendered(fmt.text(" "), Some(' '));
241                }
242            }
243            _ => {}
244        }
245    }
246
247    stack
248        .into_iter()
249        .next()
250        .map(|frame| (frame.children.join(""), frame.has_explicit_link))
251        .unwrap_or_default()
252}
253
254/// Render djot inline markup and map events to `OutputFormat` methods.
255///
256/// Parses the input as djot inline markup and transforms container and text
257/// events into formatted output. Block-level containers are collapsed to their
258/// text content. Inline containers (emphasis, strong, links, etc.) are rendered
259/// using the format's methods.
260///
261/// # Arguments
262/// * `src` - Input string with djot inline markup
263/// * `fmt` - `OutputFormat` implementation for rendering
264///
265/// # Returns
266/// Formatted string with markup applied according to the `OutputFormat`'s methods
267pub fn render_djot_inline<F: OutputFormat<Output = String>>(src: &str, fmt: &F) -> String {
268    render_djot_inline_internal(src, fmt, InlineRenderContext::default(), str::to_string).0
269}
270
271/// Render djot inline markup with an ambient inline rendering context.
272pub fn render_djot_inline_with_context<F: OutputFormat<Output = String>>(
273    src: &str,
274    fmt: &F,
275    context: InlineRenderContext,
276) -> String {
277    render_djot_inline_internal(src, fmt, context, str::to_string).0
278}
279
280/// Render djot inline markup while transforming text leaves and returning link metadata.
281pub(crate) fn render_djot_inline_with_transform<F, G>(
282    src: &str,
283    fmt: &F,
284    transform_text: G,
285) -> (String, bool)
286where
287    F: OutputFormat<Output = String>,
288    G: FnMut(&str) -> String,
289{
290    render_djot_inline_internal(src, fmt, InlineRenderContext::default(), transform_text)
291}
292
293/// Render djot inline markup with text transforms and ambient context.
294pub(crate) fn render_djot_inline_with_transform_and_context<F, G>(
295    src: &str,
296    fmt: &F,
297    context: InlineRenderContext,
298    transform_text: G,
299) -> (String, bool)
300where
301    F: OutputFormat<Output = String>,
302    G: FnMut(&str) -> String,
303{
304    render_djot_inline_internal(src, fmt, context, transform_text)
305}
306
307/// Render org-mode inline markup by walking the orgize event stream.
308///
309/// Parses `src` as org-mode and maps inline elements to `OutputFormat` methods:
310/// bold (`*text*`) → `strong`, italic (`/text/`) → `emph`, verbatim/code →
311/// `text` (stripped), links (`[[url][desc]]`) → `link`, plain text → `text`.
312/// Container elements (Bold, Italic) are collected via a stack so nested
313/// markup is handled correctly.
314pub fn render_org_inline<F: OutputFormat<Output = String>>(src: &str, fmt: &F) -> String {
315    use orgize::Event;
316    use orgize::Org;
317    use orgize::elements::Element;
318
319    let org = Org::parse(src);
320    // Stack of (tag, accumulated_children) for open containers.
321    // Tags: 0 = Bold, 1 = Italic, 2 = root paragraph accumulator.
322    let mut stack: Vec<(u8, String)> = vec![(2, String::new())];
323
324    for event in org.iter() {
325        match event {
326            Event::Start(Element::Bold) => stack.push((0, String::new())),
327            Event::Start(Element::Italic) => stack.push((1, String::new())),
328            Event::End(Element::Bold) => {
329                if let Some((0, inner)) = stack.pop() {
330                    let rendered = fmt.strong(inner);
331                    if let Some(top) = stack.last_mut() {
332                        top.1.push_str(&rendered);
333                    }
334                }
335            }
336            Event::End(Element::Italic) => {
337                if let Some((1, inner)) = stack.pop() {
338                    let rendered = fmt.emph(inner);
339                    if let Some(top) = stack.last_mut() {
340                        top.1.push_str(&rendered);
341                    }
342                }
343            }
344            Event::Start(Element::Link(link)) => {
345                let desc = link.desc.as_deref().unwrap_or(&link.path);
346                let rendered = fmt.link(&link.path, fmt.text(desc));
347                if let Some(top) = stack.last_mut() {
348                    top.1.push_str(&rendered);
349                }
350            }
351            Event::Start(Element::Text { value }) => {
352                if let Some(top) = stack.last_mut() {
353                    top.1.push_str(&fmt.text(value));
354                }
355            }
356            Event::Start(Element::Verbatim { value } | Element::Code { value }) => {
357                if let Some(top) = stack.last_mut() {
358                    top.1.push_str(&fmt.text(value));
359                }
360            }
361            _ => {}
362        }
363    }
364
365    stack.into_iter().next().map(|(_, s)| s).unwrap_or_default()
366}
367
368#[cfg(test)]
369#[allow(
370    clippy::unwrap_used,
371    clippy::expect_used,
372    clippy::panic,
373    clippy::indexing_slicing,
374    clippy::todo,
375    clippy::unimplemented,
376    clippy::unreachable,
377    clippy::get_unwrap,
378    reason = "Panicking is acceptable and often desired in tests."
379)]
380mod tests {
381    use super::*;
382    use crate::render::html::Html;
383    use crate::render::plain::PlainText;
384    use crate::render::typst::Typst;
385
386    #[test]
387    fn test_djot_emphasis_plain() {
388        let fmt = PlainText;
389        let result = render_djot_inline("_foo_", &fmt);
390        // PlainText.emph() wraps content in _..._
391        assert_eq!(result, "_foo_");
392    }
393
394    #[test]
395    fn test_djot_strong_single_asterisk() {
396        let fmt = PlainText;
397        // jotdown uses * for strong (bold), not **
398        let result = render_djot_inline("*bar*", &fmt);
399        // PlainText.strong() wraps content in **...**
400        assert_eq!(result, "**bar**");
401    }
402
403    #[test]
404    fn test_djot_unicode_math() {
405        let fmt = PlainText;
406        let result = render_djot_inline("H₂O", &fmt);
407        assert_eq!(result, "H₂O");
408    }
409
410    #[test]
411    fn test_djot_plain_no_markup() {
412        let fmt = PlainText;
413        let result = render_djot_inline("plain text with no markup", &fmt);
414        assert_eq!(result, "plain text with no markup");
415    }
416
417    #[test]
418    fn test_djot_combined_formatting() {
419        let fmt = PlainText;
420        // In djot, _text_ is emphasis and *text* is strong
421        let result = render_djot_inline("_emphasized *bold* text_", &fmt);
422        // Emphasis wraps in _..._. Inside that, strong wraps in **...**
423        assert_eq!(result, "_emphasized **bold** text_");
424    }
425
426    #[test]
427    fn test_djot_link() {
428        let fmt = PlainText;
429        // In djot, [text](url) is a link
430        let result = render_djot_inline("[click here](https://example.com)", &fmt);
431        // PlainText.link() just renders the link text (ignores URL)
432        assert_eq!(result, "click here");
433    }
434
435    #[test]
436    fn test_djot_nested_formatting_preserves_typst_markup() {
437        let fmt = Typst;
438        let result = render_djot_inline("_emphasized *bold* text_", &fmt);
439        assert_eq!(result, "#emph[emphasized #strong[bold] text]");
440    }
441
442    #[test]
443    fn test_djot_nested_link_preserves_inner_markup_html() {
444        let fmt = Html;
445        let result = render_djot_inline("[_linked emphasis_](https://example.com)", &fmt);
446        assert_eq!(
447            result,
448            r#"<a href="https://example.com"><em>linked emphasis</em></a>"#
449        );
450    }
451
452    #[test]
453    fn test_djot_quotes_inside_emphasis_open_correctly() {
454        let fmt = PlainText;
455        let result = render_djot_inline("_\"Parmenides\" dialogue_", &fmt);
456        assert_eq!(result, "_“Parmenides” dialogue_");
457    }
458
459    #[test]
460    fn test_djot_quotes_with_ambient_quote_depth_use_inner_marks() {
461        let fmt = PlainText;
462        let result = render_djot_inline_with_context(
463            "\"Parmenides\" dialogue",
464            &fmt,
465            InlineRenderContext { quote_depth: 1 },
466        );
467        assert_eq!(result, "‘Parmenides’ dialogue");
468    }
469
470    #[test]
471    fn test_djot_nested_quotes_alternate_marks() {
472        let fmt = PlainText;
473        let result = render_djot_inline("\"outer \"inner\" claim\"", &fmt);
474        assert_eq!(result, "“outer ‘inner’ claim”");
475    }
476
477    #[test]
478    fn test_djot_quotes_inside_emphasis_use_ambient_quote_depth() {
479        let fmt = PlainText;
480        let result = render_djot_inline_with_context(
481            "_\"Parmenides\" dialogue_",
482            &fmt,
483            InlineRenderContext { quote_depth: 1 },
484        );
485        assert_eq!(result, "_‘Parmenides’ dialogue_");
486    }
487
488    #[test]
489    fn test_org_plain_text() {
490        let fmt = PlainText;
491        let result = render_org_inline("plain text with no markup", &fmt);
492        assert_eq!(result, "plain text with no markup");
493    }
494
495    #[test]
496    fn test_org_bold() {
497        let fmt = PlainText;
498        // PlainText.strong() wraps in **...**
499        let result = render_org_inline("*bold*", &fmt);
500        assert_eq!(result, "**bold**");
501    }
502
503    #[test]
504    fn test_org_italic() {
505        let fmt = PlainText;
506        // PlainText.emph() wraps in _..._
507        let result = render_org_inline("/italic/", &fmt);
508        assert_eq!(result, "_italic_");
509    }
510}