citum_engine/processor/document/djot/
mod.rs1pub(crate) mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13 FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14 scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24 fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25 let (frontmatter_result, remaining_content) = parse_frontmatter(content);
27 let body_start = content.len() - remaining_content.len();
28 let (frontmatter, frontmatter_error) = match frontmatter_result {
29 Ok(fm) => (fm, None),
30 Err(e) => (None, Some(e)),
31 };
32
33 let (manual_note_references, manual_note_labels, footnote_definitions) =
34 scan_manual_notes(remaining_content);
35
36 let mut manual_note_order = Vec::new();
37 let mut seen_manual = HashSet::new();
38 for note in &manual_note_references {
39 if seen_manual.insert(note.label.clone()) {
40 manual_note_order.push(note.label.clone());
41 }
42 }
43
44 let mut citations: Vec<_> = find_citations(remaining_content, locale)
45 .into_iter()
46 .map(|(start, end, citation)| super::ParsedCitation {
47 start,
48 end,
49 citation,
50 placement: citation_placement(start, end, &footnote_definitions),
51 structure: Default::default(),
52 })
53 .collect();
54 annotate_citation_structures(remaining_content, &mut citations);
55
56 let bibliography_blocks = scan_bibliography_blocks(remaining_content);
58
59 let frontmatter_groups = frontmatter.as_ref().and_then(|fm| fm.bibliography.clone());
60 let frontmatter_options = frontmatter.as_ref().and_then(|fm| fm.options.clone());
61 let frontmatter_integral_name_memory = frontmatter
63 .as_ref()
64 .and_then(|fm| fm.integral_name_memory.clone())
65 .filter(|_| {
66 frontmatter_options
67 .as_ref()
68 .and_then(|o| o.integral_name_memory.as_ref())
69 .is_none()
70 });
71 let frontmatter_org_abbreviation_memory = frontmatter
72 .and_then(|fm| fm.org_abbreviation_memory)
73 .filter(|_| {
74 frontmatter_options
75 .as_ref()
76 .and_then(|o| o.org_abbreviation_memory.as_ref())
77 .is_none()
78 });
79 ParsedDocument {
80 citations,
81 manual_note_order,
82 manual_note_references,
83 manual_note_labels,
84 bibliography_blocks,
85 frontmatter_groups,
86 frontmatter_integral_name_memory,
87 frontmatter_org_abbreviation_memory,
88 frontmatter_options,
89 frontmatter_error,
90 body_start,
91 }
92 }
93
94 fn finalize_html_output(&self, rendered: &str) -> String {
96 djot_to_html(rendered)
97 }
98}
99
100fn citation_placement(
102 start: usize,
103 end: usize,
104 footnote_definitions: &[FootnoteDefinitionRange],
105) -> CitationPlacement {
106 footnote_definitions
107 .iter()
108 .find(|definition| definition.content.start <= start && end <= definition.content.end)
109 .map_or(CitationPlacement::InlineProse, |definition| {
110 CitationPlacement::ManualFootnote {
111 label: definition.label.clone(),
112 }
113 })
114}
115
116#[must_use]
118pub fn djot_to_html(djot: &str) -> String {
119 let events = jotdown::Parser::new(djot);
120 jotdown::html::render_to_string(events)
121}
122
123#[cfg(test)]
124#[allow(
125 clippy::unwrap_used,
126 clippy::expect_used,
127 clippy::panic,
128 clippy::indexing_slicing,
129 clippy::todo,
130 clippy::unimplemented,
131 clippy::unreachable,
132 clippy::get_unwrap,
133 reason = "Panicking is acceptable and often desired in tests."
134)]
135mod tests {
136 use super::*;
137 use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
138
139 #[test]
140 fn test_parse_multi_cite_with_locators() {
141 let parser = DjotParser;
142 let content = "[@kuhn1962; @watson1953, ch. 2]";
143 let citations = parser.parse_citations(content, &Locale::en_us());
144
145 assert_eq!(citations.len(), 1);
146 let (_, _, citation) = &citations[0];
147 assert_eq!(citation.items.len(), 2);
148 assert_eq!(citation.items[0].id, "kuhn1962");
149 assert_eq!(citation.items[1].id, "watson1953");
150 assert_eq!(
151 citation.items[1].locator,
152 Some(CitationLocator::single(LocatorType::Chapter, "2"))
153 );
154 }
155
156 #[test]
157 fn test_parse_structured_locator() {
158 let parser = DjotParser;
159 let content = "[@kuhn1962, section: 5]";
160 let citations = parser.parse_citations(content, &Locale::en_us());
161
162 assert_eq!(citations.len(), 1);
163 let (_, _, citation) = &citations[0];
164 assert_eq!(
165 citation.items[0].locator,
166 Some(CitationLocator::single(LocatorType::Section, "5"))
167 );
168 }
169
170 #[test]
171 fn test_parse_compound_locator() {
172 let parser = DjotParser;
173 let content = "[@kuhn1962, chapter: 2, page: 10]";
174 let citations = parser.parse_citations(content, &Locale::en_us());
175
176 let (_, _, citation) = &citations[0];
177 let locator = citation.items[0].locator.as_ref().unwrap();
178 assert!(locator.is_compound());
179 assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
180 assert_eq!(locator.segments()[1].label, LocatorType::Page);
181 }
182
183 #[test]
184 fn test_parse_suppress_author() {
185 let parser = DjotParser;
186 let content = "[-@kuhn1962]";
187 let citations = parser.parse_citations(content, &Locale::en_us());
188
189 assert_eq!(citations.len(), 1);
190 let (_, _, citation) = &citations[0];
191 assert_eq!(citation.items[0].id, "kuhn1962");
192 assert!(citation.suppress_author);
193 }
194
195 #[test]
196 fn test_parse_bracketed_integral_citation() {
197 let parser = DjotParser;
198 let content = "[+@kuhn1962]";
199 let citations = parser.parse_citations(content, &Locale::en_us());
200
201 assert_eq!(citations.len(), 1);
202 let (_, _, citation) = &citations[0];
203 assert_eq!(citation.mode, CitationMode::Integral);
204 assert_eq!(citation.items[0].id, "kuhn1962");
205 assert!(!citation.suppress_author);
206 }
207
208 #[test]
209 fn test_parse_semicolon_without_citation() {
210 let parser = DjotParser;
211 let content = "[foo; bar]";
212 let citations = parser.parse_citations(content, &Locale::en_us());
213
214 assert_eq!(citations.len(), 0);
215 }
216
217 #[test]
218 fn test_parse_document_tracks_manual_footnotes() {
219 let parser = DjotParser;
220 let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
221 let parsed = parser.parse_document(content, &Locale::en_us());
222
223 assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
224 assert_eq!(parsed.manual_note_references.len(), 1);
225 assert_eq!(parsed.citations.len(), 1);
226 assert_eq!(
227 parsed.citations[0].placement,
228 CitationPlacement::ManualFootnote {
229 label: "m1".to_string()
230 }
231 );
232 }
233
234 #[test]
235 fn test_parse_document_marks_prose_citations_as_inline() {
236 let parser = DjotParser;
237 let content = "Text [@kuhn1962].";
238 let parsed = parser.parse_document(content, &Locale::en_us());
239
240 assert_eq!(parsed.citations.len(), 1);
241 assert_eq!(
242 parsed.citations[0].placement,
243 CitationPlacement::InlineProse
244 );
245 }
246
247 #[test]
248 fn test_djot_finalize_html_output_converts_to_html() {
249 let parser = DjotParser;
253 let result = parser.finalize_html_output("{_em_}");
254 assert!(
255 result.contains("<em>em</em>"),
256 "unexpected output: {result}"
257 );
258 }
259}