Skip to main content

citum_engine/processor/document/djot/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Djot document parsing and HTML conversion adapter.
7
8pub(crate) mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13    FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14    scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18/// A parser for Djot citations using winnow.
19/// Syntax: `[@key]`, `[+@key]`, or `[-@key]`. Multi-cites: `[@key1; @key2]`.
20#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24    fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25        // Try to parse frontmatter and get remaining content
26        let (frontmatter_result, remaining_content) = parse_frontmatter(content);
27        let body_start = content.len() - remaining_content.len();
28        let (frontmatter, frontmatter_error) = match frontmatter_result {
29            Ok(fm) => (fm, None),
30            Err(e) => (None, Some(e)),
31        };
32
33        let (manual_note_references, manual_note_labels, footnote_definitions) =
34            scan_manual_notes(remaining_content);
35
36        let mut manual_note_order = Vec::new();
37        let mut seen_manual = HashSet::new();
38        for note in &manual_note_references {
39            if seen_manual.insert(note.label.clone()) {
40                manual_note_order.push(note.label.clone());
41            }
42        }
43
44        let mut citations: Vec<_> = find_citations(remaining_content, locale)
45            .into_iter()
46            .map(|(start, end, citation)| super::ParsedCitation {
47                start,
48                end,
49                citation,
50                placement: citation_placement(start, end, &footnote_definitions),
51                structure: Default::default(),
52            })
53            .collect();
54        annotate_citation_structures(remaining_content, &mut citations);
55
56        // Scan for inline bibliography blocks in remaining content
57        let bibliography_blocks = scan_bibliography_blocks(remaining_content);
58
59        let frontmatter_groups = frontmatter.as_ref().and_then(|fm| fm.bibliography.clone());
60        let frontmatter_options = frontmatter.as_ref().and_then(|fm| fm.options.clone());
61        // Legacy top-level fields are superseded by their `options.*` counterparts.
62        let frontmatter_integral_name_memory = frontmatter
63            .as_ref()
64            .and_then(|fm| fm.integral_name_memory.clone())
65            .filter(|_| {
66                frontmatter_options
67                    .as_ref()
68                    .and_then(|o| o.integral_name_memory.as_ref())
69                    .is_none()
70            });
71        let frontmatter_org_abbreviation_memory = frontmatter
72            .and_then(|fm| fm.org_abbreviation_memory)
73            .filter(|_| {
74                frontmatter_options
75                    .as_ref()
76                    .and_then(|o| o.org_abbreviation_memory.as_ref())
77                    .is_none()
78            });
79        ParsedDocument {
80            citations,
81            manual_note_order,
82            manual_note_references,
83            manual_note_labels,
84            bibliography_blocks,
85            frontmatter_groups,
86            frontmatter_integral_name_memory,
87            frontmatter_org_abbreviation_memory,
88            frontmatter_options,
89            frontmatter_error,
90            body_start,
91        }
92    }
93
94    /// Convert Djot markup to HTML using jotdown after citation splicing.
95    fn finalize_html_output(&self, rendered: &str) -> String {
96        djot_to_html(rendered)
97    }
98
99    /// Convert Djot markup to the target terminal format (Typst, LaTeX) after
100    /// citation placeholder tokens have been spliced in.
101    fn render_body_markup<F>(&self, body: &str, fmt: &F) -> String
102    where
103        F: crate::render::format::OutputFormat<Output = String>,
104    {
105        crate::render::markup::render_djot_body(body, fmt)
106    }
107}
108
109/// Determine the citation placement within the document.
110fn citation_placement(
111    start: usize,
112    end: usize,
113    footnote_definitions: &[FootnoteDefinitionRange],
114) -> CitationPlacement {
115    footnote_definitions
116        .iter()
117        .find(|definition| definition.content.start <= start && end <= definition.content.end)
118        .map_or(CitationPlacement::InlineProse, |definition| {
119            CitationPlacement::ManualFootnote {
120                label: definition.label.clone(),
121            }
122        })
123}
124
125/// Convert Djot markup to HTML using jotdown.
126#[must_use]
127pub fn djot_to_html(djot: &str) -> String {
128    let events = jotdown::Parser::new(djot);
129    jotdown::html::render_to_string(events)
130}
131
132#[cfg(test)]
133#[allow(
134    clippy::unwrap_used,
135    clippy::expect_used,
136    clippy::panic,
137    clippy::indexing_slicing,
138    clippy::todo,
139    clippy::unimplemented,
140    clippy::unreachable,
141    clippy::get_unwrap,
142    reason = "Panicking is acceptable and often desired in tests."
143)]
144mod tests {
145    use super::*;
146    use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
147
148    #[test]
149    fn test_parse_multi_cite_with_locators() {
150        let parser = DjotParser;
151        let content = "[@kuhn1962; @watson1953, ch. 2]";
152        let citations = parser.parse_citations(content, &Locale::en_us());
153
154        assert_eq!(citations.len(), 1);
155        let (_, _, citation) = &citations[0];
156        assert_eq!(citation.items.len(), 2);
157        assert_eq!(citation.items[0].id, "kuhn1962");
158        assert_eq!(citation.items[1].id, "watson1953");
159        assert_eq!(
160            citation.items[1].locator,
161            Some(CitationLocator::single(LocatorType::Chapter, "2"))
162        );
163    }
164
165    #[test]
166    fn test_parse_structured_locator() {
167        let parser = DjotParser;
168        let content = "[@kuhn1962, section: 5]";
169        let citations = parser.parse_citations(content, &Locale::en_us());
170
171        assert_eq!(citations.len(), 1);
172        let (_, _, citation) = &citations[0];
173        assert_eq!(
174            citation.items[0].locator,
175            Some(CitationLocator::single(LocatorType::Section, "5"))
176        );
177    }
178
179    #[test]
180    fn test_parse_compound_locator() {
181        let parser = DjotParser;
182        let content = "[@kuhn1962, chapter: 2, page: 10]";
183        let citations = parser.parse_citations(content, &Locale::en_us());
184
185        let (_, _, citation) = &citations[0];
186        let locator = citation.items[0].locator.as_ref().unwrap();
187        assert!(locator.is_compound());
188        assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
189        assert_eq!(locator.segments()[1].label, LocatorType::Page);
190    }
191
192    #[test]
193    fn test_parse_suppress_author() {
194        let parser = DjotParser;
195        let content = "[-@kuhn1962]";
196        let citations = parser.parse_citations(content, &Locale::en_us());
197
198        assert_eq!(citations.len(), 1);
199        let (_, _, citation) = &citations[0];
200        assert_eq!(citation.items[0].id, "kuhn1962");
201        assert!(citation.suppress_author);
202    }
203
204    #[test]
205    fn test_parse_bracketed_integral_citation() {
206        let parser = DjotParser;
207        let content = "[+@kuhn1962]";
208        let citations = parser.parse_citations(content, &Locale::en_us());
209
210        assert_eq!(citations.len(), 1);
211        let (_, _, citation) = &citations[0];
212        assert_eq!(citation.mode, CitationMode::Integral);
213        assert_eq!(citation.items[0].id, "kuhn1962");
214        assert!(!citation.suppress_author);
215    }
216
217    #[test]
218    fn test_parse_semicolon_without_citation() {
219        let parser = DjotParser;
220        let content = "[foo; bar]";
221        let citations = parser.parse_citations(content, &Locale::en_us());
222
223        assert_eq!(citations.len(), 0);
224    }
225
226    #[test]
227    fn test_parse_document_tracks_manual_footnotes() {
228        let parser = DjotParser;
229        let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
230        let parsed = parser.parse_document(content, &Locale::en_us());
231
232        assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
233        assert_eq!(parsed.manual_note_references.len(), 1);
234        assert_eq!(parsed.citations.len(), 1);
235        assert_eq!(
236            parsed.citations[0].placement,
237            CitationPlacement::ManualFootnote {
238                label: "m1".to_string()
239            }
240        );
241    }
242
243    #[test]
244    fn test_parse_document_marks_prose_citations_as_inline() {
245        let parser = DjotParser;
246        let content = "Text [@kuhn1962].";
247        let parsed = parser.parse_document(content, &Locale::en_us());
248
249        assert_eq!(parsed.citations.len(), 1);
250        assert_eq!(
251            parsed.citations[0].placement,
252            CitationPlacement::InlineProse
253        );
254    }
255
256    #[test]
257    fn test_djot_finalize_html_output_converts_to_html() {
258        // DjotParser explicitly overrides finalize_html_output to run jotdown,
259        // converting Djot markup to HTML. This is adapter-specific behavior;
260        // other parsers (e.g. MarkdownParser) return markup unchanged.
261        let parser = DjotParser;
262        let result = parser.finalize_html_output("{_em_}");
263        assert!(
264            result.contains("<em>em</em>"),
265            "unexpected output: {result}"
266        );
267    }
268}