citum_engine/processor/document/djot/
mod.rs1mod parsing;
9
10use super::{BibliographyBlock, CitationParser, CitationPlacement, ParsedDocument};
11use citum_schema::locale::Locale;
12use parsing::{
13 FootnoteDefinitionRange, annotate_citation_structures, find_citations, parse_frontmatter,
14 scan_bibliography_blocks, scan_manual_notes,
15};
16use std::collections::HashSet;
17
18#[derive(Default)]
21pub struct DjotParser;
22
23impl CitationParser for DjotParser {
24 fn parse_document(&self, content: &str, locale: &Locale) -> ParsedDocument {
25 let (frontmatter, remaining_content) = parse_frontmatter(content);
27 let body_start = content.len() - remaining_content.len();
28
29 let (manual_note_references, manual_note_labels, footnote_definitions) =
30 scan_manual_notes(remaining_content);
31
32 let mut manual_note_order = Vec::new();
33 let mut seen_manual = HashSet::new();
34 for note in &manual_note_references {
35 if seen_manual.insert(note.label.clone()) {
36 manual_note_order.push(note.label.clone());
37 }
38 }
39
40 let mut citations: Vec<_> = find_citations(remaining_content, locale)
41 .into_iter()
42 .map(|(start, end, citation)| super::ParsedCitation {
43 start,
44 end,
45 citation,
46 placement: citation_placement(start, end, &footnote_definitions),
47 structure: Default::default(),
48 })
49 .collect();
50 annotate_citation_structures(remaining_content, &mut citations);
51
52 let bibliography_blocks = scan_bibliography_blocks(remaining_content);
54
55 ParsedDocument {
56 citations,
57 manual_note_order,
58 manual_note_references,
59 manual_note_labels,
60 bibliography_blocks,
61 frontmatter_groups: frontmatter
62 .as_ref()
63 .and_then(|frontmatter| frontmatter.bibliography.clone()),
64 frontmatter_integral_name_memory: frontmatter
65 .and_then(|frontmatter| frontmatter.integral_name_memory),
66 body_start,
67 }
68 }
69
70 fn finalize_html_output(&self, rendered: &str) -> String {
72 djot_to_html(rendered)
73 }
74}
75
76fn citation_placement(
78 start: usize,
79 end: usize,
80 footnote_definitions: &[FootnoteDefinitionRange],
81) -> CitationPlacement {
82 footnote_definitions
83 .iter()
84 .find(|definition| definition.content.start <= start && end <= definition.content.end)
85 .map_or(CitationPlacement::InlineProse, |definition| {
86 CitationPlacement::ManualFootnote {
87 label: definition.label.clone(),
88 }
89 })
90}
91
92#[must_use]
94pub fn djot_to_html(djot: &str) -> String {
95 let events = jotdown::Parser::new(djot);
96 jotdown::html::render_to_string(events)
97}
98
99#[cfg(test)]
100#[allow(
101 clippy::unwrap_used,
102 clippy::expect_used,
103 clippy::panic,
104 clippy::indexing_slicing,
105 clippy::todo,
106 clippy::unimplemented,
107 clippy::unreachable,
108 clippy::get_unwrap,
109 reason = "Panicking is acceptable and often desired in tests."
110)]
111mod tests {
112 use super::*;
113 use citum_schema::citation::{CitationLocator, CitationMode, LocatorType};
114
115 #[test]
116 fn test_parse_multi_cite_with_locators() {
117 let parser = DjotParser;
118 let content = "[@kuhn1962; @watson1953, ch. 2]";
119 let citations = parser.parse_citations(content, &Locale::en_us());
120
121 assert_eq!(citations.len(), 1);
122 let (_, _, citation) = &citations[0];
123 assert_eq!(citation.items.len(), 2);
124 assert_eq!(citation.items[0].id, "kuhn1962");
125 assert_eq!(citation.items[1].id, "watson1953");
126 assert_eq!(
127 citation.items[1].locator,
128 Some(CitationLocator::single(LocatorType::Chapter, "2"))
129 );
130 }
131
132 #[test]
133 fn test_parse_structured_locator() {
134 let parser = DjotParser;
135 let content = "[@kuhn1962, section: 5]";
136 let citations = parser.parse_citations(content, &Locale::en_us());
137
138 assert_eq!(citations.len(), 1);
139 let (_, _, citation) = &citations[0];
140 assert_eq!(
141 citation.items[0].locator,
142 Some(CitationLocator::single(LocatorType::Section, "5"))
143 );
144 }
145
146 #[test]
147 fn test_parse_compound_locator() {
148 let parser = DjotParser;
149 let content = "[@kuhn1962, chapter: 2, page: 10]";
150 let citations = parser.parse_citations(content, &Locale::en_us());
151
152 let (_, _, citation) = &citations[0];
153 let locator = citation.items[0].locator.as_ref().unwrap();
154 assert!(locator.is_compound());
155 assert_eq!(locator.segments()[0].label, LocatorType::Chapter);
156 assert_eq!(locator.segments()[1].label, LocatorType::Page);
157 }
158
159 #[test]
160 fn test_parse_suppress_author() {
161 let parser = DjotParser;
162 let content = "[-@kuhn1962]";
163 let citations = parser.parse_citations(content, &Locale::en_us());
164
165 assert_eq!(citations.len(), 1);
166 let (_, _, citation) = &citations[0];
167 assert_eq!(citation.items[0].id, "kuhn1962");
168 assert!(citation.suppress_author);
169 }
170
171 #[test]
172 fn test_parse_bracketed_integral_citation() {
173 let parser = DjotParser;
174 let content = "[+@kuhn1962]";
175 let citations = parser.parse_citations(content, &Locale::en_us());
176
177 assert_eq!(citations.len(), 1);
178 let (_, _, citation) = &citations[0];
179 assert_eq!(citation.mode, CitationMode::Integral);
180 assert_eq!(citation.items[0].id, "kuhn1962");
181 assert!(!citation.suppress_author);
182 }
183
184 #[test]
185 fn test_parse_semicolon_without_citation() {
186 let parser = DjotParser;
187 let content = "[foo; bar]";
188 let citations = parser.parse_citations(content, &Locale::en_us());
189
190 assert_eq!(citations.len(), 0);
191 }
192
193 #[test]
194 fn test_parse_document_tracks_manual_footnotes() {
195 let parser = DjotParser;
196 let content = "Text[^m1].\n\n[^m1]: See [@kuhn1962].";
197 let parsed = parser.parse_document(content, &Locale::en_us());
198
199 assert_eq!(parsed.manual_note_order, vec!["m1".to_string()]);
200 assert_eq!(parsed.manual_note_references.len(), 1);
201 assert_eq!(parsed.citations.len(), 1);
202 assert_eq!(
203 parsed.citations[0].placement,
204 CitationPlacement::ManualFootnote {
205 label: "m1".to_string()
206 }
207 );
208 }
209
210 #[test]
211 fn test_parse_document_marks_prose_citations_as_inline() {
212 let parser = DjotParser;
213 let content = "Text [@kuhn1962].";
214 let parsed = parser.parse_document(content, &Locale::en_us());
215
216 assert_eq!(parsed.citations.len(), 1);
217 assert_eq!(
218 parsed.citations[0].placement,
219 CitationPlacement::InlineProse
220 );
221 }
222
223 #[test]
224 fn test_djot_finalize_html_output_converts_to_html() {
225 let parser = DjotParser;
229 let result = parser.finalize_html_output("{_em_}");
230 assert!(
231 result.contains("<em>em</em>"),
232 "unexpected output: {result}"
233 );
234 }
235}