citum_engine/processor/document/djot/
mod.rs1pub(crate) mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13 FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14 scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24 fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25 let (frontmatter_result, remaining_content) = parse_frontmatter(content);
27 let body_start = content.len() - remaining_content.len();
28 let (frontmatter, frontmatter_error) = match frontmatter_result {
29 Ok(fm) => (fm, None),
30 Err(e) => (None, Some(e)),
31 };
32
33 let (manual_note_references, manual_note_labels, footnote_definitions) =
34 scan_manual_notes(remaining_content);
35
36 let mut manual_note_order = Vec::new();
37 let mut seen_manual = HashSet::new();
38 for note in &manual_note_references {
39 if seen_manual.insert(note.label.clone()) {
40 manual_note_order.push(note.label.clone());
41 }
42 }
43
44 let mut citations: Vec<_> = find_citations(remaining_content, locale)
45 .into_iter()
46 .map(|(start, end, citation)| super::ParsedCitation {
47 start,
48 end,
49 citation,
50 placement: citation_placement(start, end, &footnote_definitions),
51 structure: Default::default(),
52 })
53 .collect();
54 annotate_citation_structures(remaining_content, &mut citations);
55
56 let bibliography_blocks = scan_bibliography_blocks(remaining_content);
58
59 let frontmatter_groups = frontmatter.as_ref().and_then(|fm| fm.bibliography.clone());
60 let frontmatter_options = frontmatter.as_ref().and_then(|fm| fm.options.clone());
61 let frontmatter_integral_name_memory = frontmatter
63 .as_ref()
64 .and_then(|fm| fm.integral_name_memory.clone())
65 .filter(|_| {
66 frontmatter_options
67 .as_ref()
68 .and_then(|o| o.integral_name_memory.as_ref())
69 .is_none()
70 });
71 let frontmatter_org_abbreviation_memory = frontmatter
72 .and_then(|fm| fm.org_abbreviation_memory)
73 .filter(|_| {
74 frontmatter_options
75 .as_ref()
76 .and_then(|o| o.org_abbreviation_memory.as_ref())
77 .is_none()
78 });
79 ParsedDocument {
80 citations,
81 manual_note_order,
82 manual_note_references,
83 manual_note_labels,
84 bibliography_blocks,
85 frontmatter_groups,
86 frontmatter_integral_name_memory,
87 frontmatter_org_abbreviation_memory,
88 frontmatter_options,
89 frontmatter_error,
90 body_start,
91 }
92 }
93
94 fn finalize_html_output(&self, rendered: &str) -> String {
96 djot_to_html(rendered)
97 }
98
99 fn render_body_markup<F>(&self, body: &str, fmt: &F) -> String
102 where
103 F: crate::render::format::OutputFormat<Output = String>,
104 {
105 crate::render::markup::render_djot_body(body, fmt)
106 }
107}
108
109fn citation_placement(
111 start: usize,
112 end: usize,
113 footnote_definitions: &[FootnoteDefinitionRange],
114) -> CitationPlacement {
115 footnote_definitions
116 .iter()
117 .find(|definition| definition.content.start <= start && end <= definition.content.end)
118 .map_or(CitationPlacement::InlineProse, |definition| {
119 CitationPlacement::ManualFootnote {
120 label: definition.label.clone(),
121 }
122 })
123}
124
125#[must_use]
127pub fn djot_to_html(djot: &str) -> String {
128 let events = jotdown::Parser::new(djot);
129 jotdown::html::render_to_string(events)
130}
131
132#[cfg(test)]
133#[allow(
134 clippy::unwrap_used,
135 clippy::expect_used,
136 clippy::panic,
137 clippy::indexing_slicing,
138 clippy::todo,
139 clippy::unimplemented,
140 clippy::unreachable,
141 clippy::get_unwrap,
142 reason = "Panicking is acceptable and often desired in tests."
143)]
144mod tests {
145 use super::*;
146 use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
147
148 #[test]
149 fn test_parse_multi_cite_with_locators() {
150 let parser = DjotParser;
151 let content = "[@kuhn1962; @watson1953, ch. 2]";
152 let citations = parser.parse_citations(content, &Locale::en_us());
153
154 assert_eq!(citations.len(), 1);
155 let (_, _, citation) = &citations[0];
156 assert_eq!(citation.items.len(), 2);
157 assert_eq!(citation.items[0].id, "kuhn1962");
158 assert_eq!(citation.items[1].id, "watson1953");
159 assert_eq!(
160 citation.items[1].locator,
161 Some(CitationLocator::single(LocatorType::Chapter, "2"))
162 );
163 }
164
165 #[test]
166 fn test_parse_structured_locator() {
167 let parser = DjotParser;
168 let content = "[@kuhn1962, section: 5]";
169 let citations = parser.parse_citations(content, &Locale::en_us());
170
171 assert_eq!(citations.len(), 1);
172 let (_, _, citation) = &citations[0];
173 assert_eq!(
174 citation.items[0].locator,
175 Some(CitationLocator::single(LocatorType::Section, "5"))
176 );
177 }
178
179 #[test]
180 fn test_parse_compound_locator() {
181 let parser = DjotParser;
182 let content = "[@kuhn1962, chapter: 2, page: 10]";
183 let citations = parser.parse_citations(content, &Locale::en_us());
184
185 let (_, _, citation) = &citations[0];
186 let locator = citation.items[0].locator.as_ref().unwrap();
187 assert!(locator.is_compound());
188 assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
189 assert_eq!(locator.segments()[1].label, LocatorType::Page);
190 }
191
192 #[test]
193 fn test_parse_suppress_author() {
194 let parser = DjotParser;
195 let content = "[-@kuhn1962]";
196 let citations = parser.parse_citations(content, &Locale::en_us());
197
198 assert_eq!(citations.len(), 1);
199 let (_, _, citation) = &citations[0];
200 assert_eq!(citation.items[0].id, "kuhn1962");
201 assert!(citation.suppress_author);
202 }
203
204 #[test]
205 fn test_parse_bracketed_integral_citation() {
206 let parser = DjotParser;
207 let content = "[+@kuhn1962]";
208 let citations = parser.parse_citations(content, &Locale::en_us());
209
210 assert_eq!(citations.len(), 1);
211 let (_, _, citation) = &citations[0];
212 assert_eq!(citation.mode, CitationMode::Integral);
213 assert_eq!(citation.items[0].id, "kuhn1962");
214 assert!(!citation.suppress_author);
215 }
216
217 #[test]
218 fn test_parse_semicolon_without_citation() {
219 let parser = DjotParser;
220 let content = "[foo; bar]";
221 let citations = parser.parse_citations(content, &Locale::en_us());
222
223 assert_eq!(citations.len(), 0);
224 }
225
226 #[test]
227 fn test_parse_document_tracks_manual_footnotes() {
228 let parser = DjotParser;
229 let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
230 let parsed = parser.parse_document(content, &Locale::en_us());
231
232 assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
233 assert_eq!(parsed.manual_note_references.len(), 1);
234 assert_eq!(parsed.citations.len(), 1);
235 assert_eq!(
236 parsed.citations[0].placement,
237 CitationPlacement::ManualFootnote {
238 label: "m1".to_string()
239 }
240 );
241 }
242
243 #[test]
244 fn test_parse_document_marks_prose_citations_as_inline() {
245 let parser = DjotParser;
246 let content = "Text [@kuhn1962].";
247 let parsed = parser.parse_document(content, &Locale::en_us());
248
249 assert_eq!(parsed.citations.len(), 1);
250 assert_eq!(
251 parsed.citations[0].placement,
252 CitationPlacement::InlineProse
253 );
254 }
255
256 #[test]
257 fn test_djot_finalize_html_output_converts_to_html() {
258 let parser = DjotParser;
262 let result = parser.finalize_html_output("{_em_}");
263 assert!(
264 result.contains("<em>em</em>"),
265 "unexpected output: {result}"
266 );
267 }
268}