Skip to main content

citum_engine/render/
markdown.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! CommonMark (Markdown) output format.
7//!
8//! This renderer is designed for **Pandoc interop**: citum processes citations
9//! inline (replacing `[@key]` markers with rendered text) and emits the document
10//! body verbatim, so the output can be piped directly to `pandoc` or any other
11//! CommonMark-aware formatter. Only citation and bibliography strings are
12//! rendered in CommonMark markup; block-level document markup passes through
13//! unchanged.
14//!
15//! # Note styles
16//!
17//! Note-based styles (Chicago notes, etc.) emit `[^label]` anchors in prose and
18//! `[^label]: …` footnote definitions at the end of the document. These follow
19//! the Pandoc/GFM footnote extension — **not** core CommonMark. Downstream
20//! consumers must enable the extension:
21//! `pandoc --from commonmark+footnotes` (or `--from gfm`).
22
23use super::format::OutputFormat;
24use citum_schema::template::WrapPunctuation;
25
26/// Escape CommonMark-active characters in raw bibliography data text.
27///
28/// Backslash-escapes `\`, `*`, `_`, `[`, `]`, `` ` ``, `<`, `>`, and `&`
29/// so that data fields (titles, author names, etc.) cannot accidentally
30/// activate emphasis, strong, link, code-span, autolink, inline HTML, or
31/// HTML-entity syntax. Style-applied markup (`emph`, `strong`, `link`)
32/// wraps already-escaped text, so intentional markup is unaffected.
33fn escape_commonmark_text(s: &str) -> String {
34    let mut out = String::with_capacity(s.len() + 4);
35    for ch in s.chars() {
36        match ch {
37            '\\' | '*' | '_' | '[' | ']' | '`' | '<' | '>' | '&' => {
38                out.push('\\');
39                out.push(ch);
40            }
41            _ => out.push(ch),
42        }
43    }
44    out
45}
46
47/// Renders processed citations and bibliography entries as CommonMark markup.
48#[derive(Default, Clone)]
49pub struct Markdown;
50
51impl OutputFormat for Markdown {
52    type Output = String;
53
54    fn text(&self, s: &str) -> Self::Output {
55        escape_commonmark_text(s)
56    }
57
58    fn join(&self, items: Vec<Self::Output>, delimiter: &str) -> Self::Output {
59        items.join(delimiter)
60    }
61
62    fn finish(&self, output: Self::Output) -> String {
63        output
64    }
65
66    /// Render emphasis as `*content*` (CommonMark italic).
67    fn emph(&self, content: Self::Output) -> Self::Output {
68        if content.is_empty() {
69            return content;
70        }
71        format!("*{content}*")
72    }
73
74    /// Render strong emphasis as `**content**` (CommonMark bold).
75    fn strong(&self, content: Self::Output) -> Self::Output {
76        if content.is_empty() {
77            return content;
78        }
79        format!("**{content}**")
80    }
81
82    /// Render small caps as raw inline HTML.
83    ///
84    /// CommonMark has no native small-caps syntax. Raw `<span>` HTML is passed
85    /// through by Pandoc's CommonMark reader and most other processors.
86    fn small_caps(&self, content: Self::Output) -> Self::Output {
87        if content.is_empty() {
88            return content;
89        }
90        format!("<span style=\"font-variant:small-caps\">{content}</span>")
91    }
92
93    /// Render superscript as raw inline HTML.
94    ///
95    /// CommonMark has no native superscript syntax. Raw `<sup>` HTML is passed
96    /// through by Pandoc and most processors.
97    fn superscript(&self, content: Self::Output) -> Self::Output {
98        if content.is_empty() {
99            return content;
100        }
101        format!("<sup>{content}</sup>")
102    }
103
104    fn quote(&self, content: Self::Output) -> Self::Output {
105        if content.is_empty() {
106            return content;
107        }
108        format!("\u{201C}{content}\u{201D}")
109    }
110
111    fn affix(&self, prefix: &str, content: Self::Output, suffix: &str) -> Self::Output {
112        format!("{prefix}{content}{suffix}")
113    }
114
115    fn inner_affix(&self, prefix: &str, content: Self::Output, suffix: &str) -> Self::Output {
116        format!("{prefix}{content}{suffix}")
117    }
118
119    fn wrap_punctuation(&self, wrap: &WrapPunctuation, content: Self::Output) -> Self::Output {
120        match wrap {
121            WrapPunctuation::Parentheses => format!("({content})"),
122            WrapPunctuation::Brackets => format!("[{content}]"),
123            WrapPunctuation::Quotes => format!("\u{201C}{content}\u{201D}"),
124        }
125    }
126
127    /// Render a semantic class as a plain passthrough.
128    ///
129    /// CommonMark has no attribute syntax. Content is returned unchanged so
130    /// citations remain readable plain text. Use `--format html` or `--format djot`
131    /// if machine-readable semantic spans are needed.
132    fn semantic(&self, _class: &str, content: Self::Output) -> Self::Output {
133        content
134    }
135
136    fn annotation(&self, content: Self::Output) -> Self::Output {
137        if content.is_empty() {
138            return content;
139        }
140        format!("\n\n{content}")
141    }
142
143    fn link(&self, url: &str, content: Self::Output) -> Self::Output {
144        if content.is_empty() {
145            return content;
146        }
147        format!("[{content}]({url})")
148    }
149
150    fn entry(
151        &self,
152        _id: &str,
153        content: Self::Output,
154        url: Option<&str>,
155        _metadata: &super::format::ProcEntryMetadata,
156    ) -> Self::Output {
157        if let Some(u) = url {
158            self.link(u, content)
159        } else {
160            content
161        }
162    }
163}
164
165#[cfg(test)]
166#[allow(
167    clippy::unwrap_used,
168    clippy::expect_used,
169    clippy::panic,
170    clippy::indexing_slicing,
171    reason = "tests"
172)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_markdown_emph() {
178        let fmt = Markdown;
179        for (input, expected) in [("", ""), ("text", "*text*")] {
180            assert_eq!(fmt.emph(input.to_string()), expected);
181        }
182    }
183
184    #[test]
185    fn test_markdown_strong() {
186        let fmt = Markdown;
187        for (input, expected) in [("", ""), ("text", "**text**")] {
188            assert_eq!(fmt.strong(input.to_string()), expected);
189        }
190    }
191
192    #[test]
193    fn test_markdown_small_caps() {
194        let fmt = Markdown;
195        assert_eq!(fmt.small_caps(String::new()), "");
196        assert_eq!(
197            fmt.small_caps("Smith".to_string()),
198            "<span style=\"font-variant:small-caps\">Smith</span>"
199        );
200    }
201
202    #[test]
203    fn test_markdown_superscript() {
204        let fmt = Markdown;
205        assert_eq!(fmt.superscript(String::new()), "");
206        assert_eq!(fmt.superscript("2".to_string()), "<sup>2</sup>");
207    }
208
209    #[test]
210    fn test_markdown_quote() {
211        let fmt = Markdown;
212        for (input, expected) in [("", ""), ("text", "\u{201C}text\u{201D}")] {
213            assert_eq!(fmt.quote(input.to_string()), expected);
214        }
215    }
216
217    #[test]
218    fn test_markdown_semantic_passthrough() {
219        let fmt = Markdown;
220        assert_eq!(fmt.semantic("author", "Jane Doe".to_string()), "Jane Doe");
221        assert_eq!(fmt.semantic("title", String::new()), "");
222    }
223
224    #[test]
225    fn test_markdown_link() {
226        let fmt = Markdown;
227        assert_eq!(fmt.link("https://example.com", String::new()), "");
228        assert_eq!(
229            fmt.link("https://example.com", "Example".to_string()),
230            "[Example](https://example.com)"
231        );
232    }
233
234    #[test]
235    fn test_markdown_wrap_punctuation() {
236        let fmt = Markdown;
237        for (wrap, input, expected) in [
238            (WrapPunctuation::Parentheses, "text", "(text)"),
239            (WrapPunctuation::Brackets, "text", "[text]"),
240            (WrapPunctuation::Quotes, "text", "\u{201C}text\u{201D}"),
241        ] {
242            assert_eq!(fmt.wrap_punctuation(&wrap, input.to_string()), expected);
243        }
244    }
245
246    #[test]
247    fn test_markdown_text_escapes_active_chars() {
248        let fmt = Markdown;
249        assert_eq!(fmt.text("plain"), "plain");
250        assert_eq!(fmt.text("A * B"), "A \\* B");
251        assert_eq!(fmt.text("use [x]"), "use \\[x\\]");
252        assert_eq!(fmt.text("code `foo`"), "code \\`foo\\`");
253        assert_eq!(fmt.text("back\\slash"), "back\\\\slash");
254        assert_eq!(fmt.text("under_score"), "under\\_score");
255        // Angle brackets and ampersand: escape to prevent autolinks,
256        // inline HTML, and HTML entity expansion.
257        assert_eq!(fmt.text("<doi:10.1/x>"), "\\<doi:10.1/x\\>");
258        assert_eq!(fmt.text("Smith & Jones"), "Smith \\& Jones");
259        assert_eq!(fmt.text("<em>bold</em>"), "\\<em\\>bold\\</em\\>");
260    }
261}