Skip to main content

citum_engine/processor/document/djot/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Djot document parsing and HTML conversion adapter.
7
8mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13    FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14    scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18/// A parser for Djot citations using winnow.
19/// Syntax: `[@key]`, `[+@key]`, or `[-@key]`. Multi-cites: `[@key1; @key2]`.
20#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24    fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25        // Try to parse frontmatter and get remaining content
26        let (frontmatter, remaining_content) = parse_frontmatter(content);
27        let body_start = content.len() - remaining_content.len();
28
29        let (manual_note_references, manual_note_labels, footnote_definitions) =
30            scan_manual_notes(remaining_content);
31
32        let mut manual_note_order = Vec::new();
33        let mut seen_manual = HashSet::new();
34        for note in &manual_note_references {
35            if seen_manual.insert(note.label.clone()) {
36                manual_note_order.push(note.label.clone());
37            }
38        }
39
40        let mut citations: Vec<_> = find_citations(remaining_content, locale)
41            .into_iter()
42            .map(|(start, end, citation)| super::ParsedCitation {
43                start,
44                end,
45                citation,
46                placement: citation_placement(start, end, &footnote_definitions),
47                structure: Default::default(),
48            })
49            .collect();
50        annotate_citation_structures(remaining_content, &mut citations);
51
52        // Scan for inline bibliography blocks in remaining content
53        let bibliography_blocks = scan_bibliography_blocks(remaining_content);
54
55        ParsedDocument {
56            citations,
57            manual_note_order,
58            manual_note_references,
59            manual_note_labels,
60            bibliography_blocks,
61            frontmatter_groups: frontmatter
62                .as_ref()
63                .and_then(|frontmatter| frontmatter.bibliography.clone()),
64            frontmatter_integral_names: frontmatter
65                .and_then(|frontmatter| frontmatter.integral_names),
66            body_start,
67        }
68    }
69
70    /// Convert Djot markup to HTML using jotdown after citation splicing.
71    fn finalize_html_output(&self, rendered: &str) -> String {
72        djot_to_html(rendered)
73    }
74}
75
76/// Determine the citation placement within the document.
77fn citation_placement(
78    start: usize,
79    end: usize,
80    footnote_definitions: &[FootnoteDefinitionRange],
81) -> CitationPlacement {
82    footnote_definitions
83        .iter()
84        .find(|definition| definition.content.start <= start && end <= definition.content.end)
85        .map_or(CitationPlacement::InlineProse, |definition| {
86            CitationPlacement::ManualFootnote {
87                label: definition.label.clone(),
88            }
89        })
90}
91
92/// Convert Djot markup to HTML using jotdown.
93#[must_use]
94pub fn djot_to_html(djot: &str) -> String {
95    let events = jotdown::Parser::new(djot);
96    jotdown::html::render_to_string(events)
97}
98
99#[cfg(test)]
100#[allow(
101    clippy::unwrap_used,
102    clippy::expect_used,
103    clippy::panic,
104    clippy::indexing_slicing,
105    clippy::todo,
106    clippy::unimplemented,
107    clippy::unreachable,
108    clippy::get_unwrap,
109    reason = "Panicking is acceptable and often desired in tests."
110)]
111mod tests {
112    use super::*;
113    use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
114
115    #[test]
116    fn test_parse_multi_cite_with_locators() {
117        let parser = DjotParser;
118        let content = "[@kuhn1962; @watson1953, ch. 2]";
119        let citations = parser.parse_citations(content, &Locale::en_us());
120
121        assert_eq!(citations.len(), 1);
122        let (_, _, citation) = &citations[0];
123        assert_eq!(citation.items.len(), 2);
124        assert_eq!(citation.items[0].id, "kuhn1962");
125        assert_eq!(citation.items[1].id, "watson1953");
126        assert_eq!(
127            citation.items[1].locator,
128            Some(CitationLocator::single(LocatorType::Chapter, "2"))
129        );
130    }
131
132    #[test]
133    fn test_parse_structured_locator() {
134        let parser = DjotParser;
135        let content = "[@kuhn1962, section: 5]";
136        let citations = parser.parse_citations(content, &Locale::en_us());
137
138        assert_eq!(citations.len(), 1);
139        let (_, _, citation) = &citations[0];
140        assert_eq!(
141            citation.items[0].locator,
142            Some(CitationLocator::single(LocatorType::Section, "5"))
143        );
144    }
145
146    #[test]
147    fn test_parse_compound_locator() {
148        let parser = DjotParser;
149        let content = "[@kuhn1962, chapter: 2, page: 10]";
150        let citations = parser.parse_citations(content, &Locale::en_us());
151
152        let (_, _, citation) = &citations[0];
153        let locator = citation.items[0].locator.as_ref().unwrap();
154        assert!(locator.is_compound());
155        assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
156        assert_eq!(locator.segments()[1].label, LocatorType::Page);
157    }
158
159    #[test]
160    fn test_parse_suppress_author() {
161        let parser = DjotParser;
162        let content = "[-@kuhn1962]";
163        let citations = parser.parse_citations(content, &Locale::en_us());
164
165        assert_eq!(citations.len(), 1);
166        let (_, _, citation) = &citations[0];
167        assert_eq!(citation.items[0].id, "kuhn1962");
168        assert!(citation.suppress_author);
169    }
170
171    #[test]
172    fn test_parse_bracketed_integral_citation() {
173        let parser = DjotParser;
174        let content = "[+@kuhn1962]";
175        let citations = parser.parse_citations(content, &Locale::en_us());
176
177        assert_eq!(citations.len(), 1);
178        let (_, _, citation) = &citations[0];
179        assert_eq!(citation.mode, CitationMode::Integral);
180        assert_eq!(citation.items[0].id, "kuhn1962");
181        assert!(!citation.suppress_author);
182    }
183
184    #[test]
185    fn test_parse_semicolon_without_citation() {
186        let parser = DjotParser;
187        let content = "[foo; bar]";
188        let citations = parser.parse_citations(content, &Locale::en_us());
189
190        assert_eq!(citations.len(), 0);
191    }
192
193    #[test]
194    fn test_parse_document_tracks_manual_footnotes() {
195        let parser = DjotParser;
196        let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
197        let parsed = parser.parse_document(content, &Locale::en_us());
198
199        assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
200        assert_eq!(parsed.manual_note_references.len(), 1);
201        assert_eq!(parsed.citations.len(), 1);
202        assert_eq!(
203            parsed.citations[0].placement,
204            CitationPlacement::ManualFootnote {
205                label: "m1".to_string()
206            }
207        );
208    }
209
210    #[test]
211    fn test_parse_document_marks_prose_citations_as_inline() {
212        let parser = DjotParser;
213        let content = "Text [@kuhn1962].";
214        let parsed = parser.parse_document(content, &Locale::en_us());
215
216        assert_eq!(parsed.citations.len(), 1);
217        assert_eq!(
218            parsed.citations[0].placement,
219            CitationPlacement::InlineProse
220        );
221    }
222
223    #[test]
224    fn test_djot_finalize_html_output_converts_to_html() {
225        // DjotParser explicitly overrides finalize_html_output to run jotdown,
226        // converting Djot markup to HTML. This is adapter-specific behavior;
227        // other parsers (e.g. MarkdownParser) return markup unchanged.
228        let parser = DjotParser;
229        let result = parser.finalize_html_output("{_em_}");
230        assert!(
231            result.contains("<em>em</em>"),
232            "unexpected output: {result}"
233        );
234    }
235}