1use std::collections::HashSet;
4
5use eure_document::document::{EureDocument, NodeId};
6use eure_document::map::Map;
7use regex::Regex;
8use std::sync::LazyLock;
9
10use crate::document::{EumdDocument, Section};
11use crate::error::{ReferenceError, ReferenceType};
12use crate::reference::extract_references;
13
14#[derive(Debug, Default)]
16pub struct CheckResult {
17 pub errors: Vec<ReferenceError>,
19}
20
21impl CheckResult {
22 pub fn is_ok(&self) -> bool {
24 self.errors.is_empty()
25 }
26}
27
28static BIBTEX_ENTRY_PATTERN: LazyLock<Regex> =
30 LazyLock::new(|| Regex::new(r"@\w+\{([^,\s]+)").expect("invalid bibtex entry regex"));
31
32fn extract_bibtex_keys(bibtex: &str) -> HashSet<String> {
34 BIBTEX_ENTRY_PATTERN
35 .captures_iter(bibtex)
36 .map(|cap| cap[1].to_string())
37 .collect()
38}
39
40fn collect_section_keys(sections: &Map<String, Section>, keys: &mut HashSet<String>) {
42 for (key, section) in sections.iter() {
43 keys.insert(key.clone());
44 collect_section_keys(§ion.sections, keys);
45 }
46}
47
48pub fn check_references(doc: &EumdDocument) -> CheckResult {
50 let mut result = CheckResult::default();
51
52 let cite_keys: HashSet<String> = doc
54 .cites
55 .as_ref()
56 .map(|c| extract_bibtex_keys(c))
57 .unwrap_or_default();
58
59 let footnote_keys: HashSet<String> = doc.footnotes.iter().map(|(k, _)| k.clone()).collect();
60
61 let mut section_keys = HashSet::new();
62 collect_section_keys(&doc.sections, &mut section_keys);
63
64 check_content_simple(
66 doc.description.as_deref(),
67 "in description",
68 &cite_keys,
69 &footnote_keys,
70 §ion_keys,
71 &mut result,
72 );
73
74 check_content_simple(
75 doc.intro.as_deref(),
76 "in intro",
77 &cite_keys,
78 &footnote_keys,
79 §ion_keys,
80 &mut result,
81 );
82
83 check_sections_simple(
85 &doc.sections,
86 "",
87 &cite_keys,
88 &footnote_keys,
89 §ion_keys,
90 &mut result,
91 );
92
93 for (key, footnote) in doc.footnotes.iter() {
95 check_content_simple(
96 Some(&footnote.content),
97 &format!("in footnote '{key}'"),
98 &cite_keys,
99 &footnote_keys,
100 §ion_keys,
101 &mut result,
102 );
103 }
104
105 result
106}
107
108fn check_content_simple(
109 content: Option<&str>,
110 location: &str,
111 cite_keys: &HashSet<String>,
112 footnote_keys: &HashSet<String>,
113 section_keys: &HashSet<String>,
114 result: &mut CheckResult,
115) {
116 let Some(content) = content else { return };
117
118 for reference in extract_references(content) {
119 let is_valid = match reference.ref_type {
120 ReferenceType::Cite => cite_keys.contains(&reference.key),
121 ReferenceType::Footnote => footnote_keys.contains(&reference.key),
122 ReferenceType::Section => section_keys.contains(&reference.key),
123 };
124
125 if !is_valid {
126 result.errors.push(ReferenceError::new(
127 reference.ref_type,
128 reference.key,
129 location.to_string(),
130 ));
131 }
132 }
133}
134
135fn check_sections_simple(
136 sections: &Map<String, Section>,
137 path: &str,
138 cite_keys: &HashSet<String>,
139 footnote_keys: &HashSet<String>,
140 section_keys: &HashSet<String>,
141 result: &mut CheckResult,
142) {
143 for (key, section) in sections.iter() {
144 let current_path = if path.is_empty() {
145 key.clone()
146 } else {
147 format!("{path}.{key}")
148 };
149
150 check_content_simple(
152 section.header.as_deref(),
153 &format!("in section '{current_path}' header"),
154 cite_keys,
155 footnote_keys,
156 section_keys,
157 result,
158 );
159
160 check_content_simple(
162 section.body.as_deref(),
163 &format!("in section '{current_path}'"),
164 cite_keys,
165 footnote_keys,
166 section_keys,
167 result,
168 );
169
170 check_sections_simple(
172 §ion.sections,
173 ¤t_path,
174 cite_keys,
175 footnote_keys,
176 section_keys,
177 result,
178 );
179 }
180}
181
182struct CheckContext<'a> {
188 raw_doc: &'a EureDocument,
189 cite_keys: HashSet<String>,
190 footnote_keys: HashSet<String>,
191 section_keys: HashSet<String>,
192 result: CheckResult,
193}
194
195impl<'a> CheckContext<'a> {
196 fn new(eumd_doc: &EumdDocument, raw_doc: &'a EureDocument) -> Self {
197 let cite_keys: HashSet<String> = eumd_doc
198 .cites
199 .as_ref()
200 .map(|c| extract_bibtex_keys(c))
201 .unwrap_or_default();
202
203 let footnote_keys: HashSet<String> =
204 eumd_doc.footnotes.iter().map(|(k, _)| k.clone()).collect();
205
206 let mut section_keys = HashSet::new();
207 collect_section_keys(&eumd_doc.sections, &mut section_keys);
208
209 CheckContext {
210 raw_doc,
211 cite_keys,
212 footnote_keys,
213 section_keys,
214 result: CheckResult::default(),
215 }
216 }
217
218 fn check_content(&mut self, content: &str, location: &str, node_id: NodeId) {
219 let content_offset = get_code_block_content_offset(self.raw_doc, node_id);
221
222 for reference in extract_references(content) {
223 let is_valid = match reference.ref_type {
224 ReferenceType::Cite => self.cite_keys.contains(&reference.key),
225 ReferenceType::Footnote => self.footnote_keys.contains(&reference.key),
226 ReferenceType::Section => self.section_keys.contains(&reference.key),
227 };
228
229 if !is_valid {
230 self.result.errors.push(ReferenceError::with_span(
231 reference.ref_type,
232 reference.key,
233 location.to_string(),
234 node_id,
235 content_offset + reference.offset,
236 reference.len,
237 ));
238 }
239 }
240 }
241
242 fn check_sections(
243 &mut self,
244 sections: &Map<String, Section>,
245 path: &str,
246 sections_node_id: NodeId,
247 ) {
248 let sections_node = self.raw_doc.node(sections_node_id);
249 let Some(sections_map) = sections_node.as_map() else {
250 return;
251 };
252
253 for (key, section) in sections.iter() {
254 let current_path = if path.is_empty() {
255 key.clone()
256 } else {
257 format!("{path}.{key}")
258 };
259
260 let Some(section_node_id) = sections_map.get_node_id(&key.clone().into()) else {
261 continue;
262 };
263
264 let section_node = self.raw_doc.node(section_node_id);
265 let Some(section_map) = section_node.as_map() else {
266 continue;
267 };
268
269 if let Some(ref header) = section.header
271 && let Some(header_node_id) = section_map.get_node_id(&"header".into())
272 {
273 self.check_content(
274 header,
275 &format!("in section '{current_path}' header"),
276 header_node_id,
277 );
278 }
279
280 if let Some(ref body) = section.body
282 && let Some(body_node_id) = section_map.get_node_id(&"body".into())
283 {
284 self.check_content(body, &format!("in section '{current_path}'"), body_node_id);
285 }
286
287 if let Some(nested_sections_id) = section_map.get_node_id(&"sections".into()) {
289 self.check_sections(§ion.sections, ¤t_path, nested_sections_id);
290 }
291 }
292 }
293}
294
295fn get_code_block_content_offset(_raw_doc: &EureDocument, _node_id: NodeId) -> u32 {
297 0
302}
303
304pub fn check_references_with_spans(eumd_doc: &EumdDocument, raw_doc: &EureDocument) -> CheckResult {
306 let mut ctx = CheckContext::new(eumd_doc, raw_doc);
307
308 let root_id = raw_doc.get_root_id();
309 let root = raw_doc.node(root_id);
310
311 let Some(map) = root.as_map() else {
312 return ctx.result;
313 };
314
315 if let Some(ref content) = eumd_doc.description
317 && let Some(node_id) = map.get_node_id(&"description".into())
318 {
319 ctx.check_content(content, "in description", node_id);
320 }
321
322 if let Some(ref content) = eumd_doc.intro
324 && let Some(node_id) = map.get_node_id(&"intro".into())
325 {
326 ctx.check_content(content, "in intro", node_id);
327 }
328
329 if let Some(sections_node_id) = map.get_node_id(&"sections".into()) {
331 ctx.check_sections(&eumd_doc.sections, "", sections_node_id);
332 }
333
334 if let Some(footnotes_node_id) = map.get_node_id(&"footnotes".into()) {
336 let footnotes_node = raw_doc.node(footnotes_node_id);
337 if let Some(footnotes_map) = footnotes_node.as_map() {
338 for (key, footnote) in eumd_doc.footnotes.iter() {
339 if let Some(footnote_node_id) = footnotes_map.get_node_id(&key.clone().into())
340 && let Some(content_node_id) = raw_doc
341 .node(footnote_node_id)
342 .as_map()
343 .and_then(|m| m.get_node_id(&"content".into()))
344 {
345 ctx.check_content(
346 &footnote.content,
347 &format!("in footnote '{key}'"),
348 content_node_id,
349 );
350 }
351 }
352 }
353 }
354
355 ctx.result
356}
357
358#[cfg(test)]
359mod tests {
360 use super::*;
361
362 #[test]
363 fn test_extract_bibtex_keys() {
364 let bibtex = r#"
365@article{knuth1984,
366 author = "Donald Knuth",
367 title = "Literate Programming"
368}
369
370@book{lamport1994,
371 author = "Leslie Lamport"
372}
373"#;
374 let keys = extract_bibtex_keys(bibtex);
375 assert!(keys.contains("knuth1984"));
376 assert!(keys.contains("lamport1994"));
377 assert_eq!(keys.len(), 2);
378 }
379}