Skip to main content

citum_engine/processor/document/djot/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Djot document parsing and HTML conversion adapter.
7
8pub(crate) mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13    FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14    scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18/// A parser for Djot citations using winnow.
19/// Syntax: `[@key]`, `[+@key]`, or `[-@key]`. Multi-cites: `[@key1; @key2]`.
20#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24    fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25        // Try to parse frontmatter and get remaining content
26        let (frontmatter_result, remaining_content) = parse_frontmatter(content);
27        let body_start = content.len() - remaining_content.len();
28        let (frontmatter, frontmatter_error) = match frontmatter_result {
29            Ok(fm) => (fm, None),
30            Err(e) => (None, Some(e)),
31        };
32
33        let (manual_note_references, manual_note_labels, footnote_definitions) =
34            scan_manual_notes(remaining_content);
35
36        let mut manual_note_order = Vec::new();
37        let mut seen_manual = HashSet::new();
38        for note in &manual_note_references {
39            if seen_manual.insert(note.label.clone()) {
40                manual_note_order.push(note.label.clone());
41            }
42        }
43
44        let mut citations: Vec<_> = find_citations(remaining_content, locale)
45            .into_iter()
46            .map(|(start, end, citation)| super::ParsedCitation {
47                start,
48                end,
49                citation,
50                placement: citation_placement(start, end, &footnote_definitions),
51                structure: Default::default(),
52            })
53            .collect();
54        annotate_citation_structures(remaining_content, &mut citations);
55
56        // Scan for inline bibliography blocks in remaining content
57        let bibliography_blocks = scan_bibliography_blocks(remaining_content);
58
59        let frontmatter_groups = frontmatter.as_ref().and_then(|fm| fm.bibliography.clone());
60        let frontmatter_options = frontmatter.as_ref().and_then(|fm| fm.options.clone());
61        // Legacy top-level fields are superseded by their `options.*` counterparts.
62        let frontmatter_integral_name_memory = frontmatter
63            .as_ref()
64            .and_then(|fm| fm.integral_name_memory.clone())
65            .filter(|_| {
66                frontmatter_options
67                    .as_ref()
68                    .and_then(|o| o.integral_name_memory.as_ref())
69                    .is_none()
70            });
71        let frontmatter_org_abbreviation_memory = frontmatter
72            .and_then(|fm| fm.org_abbreviation_memory)
73            .filter(|_| {
74                frontmatter_options
75                    .as_ref()
76                    .and_then(|o| o.org_abbreviation_memory.as_ref())
77                    .is_none()
78            });
79        ParsedDocument {
80            citations,
81            manual_note_order,
82            manual_note_references,
83            manual_note_labels,
84            bibliography_blocks,
85            frontmatter_groups,
86            frontmatter_integral_name_memory,
87            frontmatter_org_abbreviation_memory,
88            frontmatter_options,
89            frontmatter_error,
90            body_start,
91        }
92    }
93
94    /// Convert Djot markup to HTML using jotdown after citation splicing.
95    fn finalize_html_output(&self, rendered: &str) -> String {
96        djot_to_html(rendered)
97    }
98}
99
100/// Determine the citation placement within the document.
101fn citation_placement(
102    start: usize,
103    end: usize,
104    footnote_definitions: &[FootnoteDefinitionRange],
105) -> CitationPlacement {
106    footnote_definitions
107        .iter()
108        .find(|definition| definition.content.start <= start && end <= definition.content.end)
109        .map_or(CitationPlacement::InlineProse, |definition| {
110            CitationPlacement::ManualFootnote {
111                label: definition.label.clone(),
112            }
113        })
114}
115
116/// Convert Djot markup to HTML using jotdown.
117#[must_use]
118pub fn djot_to_html(djot: &str) -> String {
119    let events = jotdown::Parser::new(djot);
120    jotdown::html::render_to_string(events)
121}
122
123#[cfg(test)]
124#[allow(
125    clippy::unwrap_used,
126    clippy::expect_used,
127    clippy::panic,
128    clippy::indexing_slicing,
129    clippy::todo,
130    clippy::unimplemented,
131    clippy::unreachable,
132    clippy::get_unwrap,
133    reason = "Panicking is acceptable and often desired in tests."
134)]
135mod tests {
136    use super::*;
137    use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
138
139    #[test]
140    fn test_parse_multi_cite_with_locators() {
141        let parser = DjotParser;
142        let content = "[@kuhn1962; @watson1953, ch. 2]";
143        let citations = parser.parse_citations(content, &Locale::en_us());
144
145        assert_eq!(citations.len(), 1);
146        let (_, _, citation) = &citations[0];
147        assert_eq!(citation.items.len(), 2);
148        assert_eq!(citation.items[0].id, "kuhn1962");
149        assert_eq!(citation.items[1].id, "watson1953");
150        assert_eq!(
151            citation.items[1].locator,
152            Some(CitationLocator::single(LocatorType::Chapter, "2"))
153        );
154    }
155
156    #[test]
157    fn test_parse_structured_locator() {
158        let parser = DjotParser;
159        let content = "[@kuhn1962, section: 5]";
160        let citations = parser.parse_citations(content, &Locale::en_us());
161
162        assert_eq!(citations.len(), 1);
163        let (_, _, citation) = &citations[0];
164        assert_eq!(
165            citation.items[0].locator,
166            Some(CitationLocator::single(LocatorType::Section, "5"))
167        );
168    }
169
170    #[test]
171    fn test_parse_compound_locator() {
172        let parser = DjotParser;
173        let content = "[@kuhn1962, chapter: 2, page: 10]";
174        let citations = parser.parse_citations(content, &Locale::en_us());
175
176        let (_, _, citation) = &citations[0];
177        let locator = citation.items[0].locator.as_ref().unwrap();
178        assert!(locator.is_compound());
179        assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
180        assert_eq!(locator.segments()[1].label, LocatorType::Page);
181    }
182
183    #[test]
184    fn test_parse_suppress_author() {
185        let parser = DjotParser;
186        let content = "[-@kuhn1962]";
187        let citations = parser.parse_citations(content, &Locale::en_us());
188
189        assert_eq!(citations.len(), 1);
190        let (_, _, citation) = &citations[0];
191        assert_eq!(citation.items[0].id, "kuhn1962");
192        assert!(citation.suppress_author);
193    }
194
195    #[test]
196    fn test_parse_bracketed_integral_citation() {
197        let parser = DjotParser;
198        let content = "[+@kuhn1962]";
199        let citations = parser.parse_citations(content, &Locale::en_us());
200
201        assert_eq!(citations.len(), 1);
202        let (_, _, citation) = &citations[0];
203        assert_eq!(citation.mode, CitationMode::Integral);
204        assert_eq!(citation.items[0].id, "kuhn1962");
205        assert!(!citation.suppress_author);
206    }
207
208    #[test]
209    fn test_parse_semicolon_without_citation() {
210        let parser = DjotParser;
211        let content = "[foo; bar]";
212        let citations = parser.parse_citations(content, &Locale::en_us());
213
214        assert_eq!(citations.len(), 0);
215    }
216
217    #[test]
218    fn test_parse_document_tracks_manual_footnotes() {
219        let parser = DjotParser;
220        let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
221        let parsed = parser.parse_document(content, &Locale::en_us());
222
223        assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
224        assert_eq!(parsed.manual_note_references.len(), 1);
225        assert_eq!(parsed.citations.len(), 1);
226        assert_eq!(
227            parsed.citations[0].placement,
228            CitationPlacement::ManualFootnote {
229                label: "m1".to_string()
230            }
231        );
232    }
233
234    #[test]
235    fn test_parse_document_marks_prose_citations_as_inline() {
236        let parser = DjotParser;
237        let content = "Text [@kuhn1962].";
238        let parsed = parser.parse_document(content, &Locale::en_us());
239
240        assert_eq!(parsed.citations.len(), 1);
241        assert_eq!(
242            parsed.citations[0].placement,
243            CitationPlacement::InlineProse
244        );
245    }
246
247    #[test]
248    fn test_djot_finalize_html_output_converts_to_html() {
249        // DjotParser explicitly overrides finalize_html_output to run jotdown,
250        // converting Djot markup to HTML. This is adapter-specific behavior;
251        // other parsers (e.g. MarkdownParser) return markup unchanged.
252        let parser = DjotParser;
253        let result = parser.finalize_html_output("{_em_}");
254        assert!(
255            result.contains("<em>em</em>"),
256            "unexpected output: {result}"
257        );
258    }
259}