1use std::collections::HashMap;
2use std::fs;
3
4use rdx_ast::*;
5
6use crate::{Transform, synthetic_pos};
7
8pub struct CitationResolve {
18 pub entries: HashMap<String, BibEntry>,
20 pub style: CitationStyle,
22}
23
24#[derive(Debug, Clone, PartialEq)]
25pub enum CitationStyle {
26 AuthorYear,
28 Numeric,
30}
31
32#[derive(Debug, Clone)]
34pub struct BibEntry {
35 pub key: String,
36 pub entry_type: String,
37 pub authors: Vec<String>,
38 pub title: String,
39 pub year: String,
40 pub journal: Option<String>,
41 pub publisher: Option<String>,
42 pub volume: Option<String>,
43 pub pages: Option<String>,
44 pub url: Option<String>,
45 pub doi: Option<String>,
46}
47
48impl CitationResolve {
49 pub fn from_bib_file(path: &str, style: CitationStyle) -> Result<Self, String> {
51 let content =
52 fs::read_to_string(path).map_err(|e| format!("cannot read {}: {}", path, e))?;
53 let entries = parse_bib(&content);
54 Ok(CitationResolve { entries, style })
55 }
56
57 pub fn new(entries: HashMap<String, BibEntry>, style: CitationStyle) -> Self {
59 CitationResolve { entries, style }
60 }
61
62 fn format_inline(&self, keys: &[CitationKey], key_order: &[String]) -> String {
63 match self.style {
64 CitationStyle::AuthorYear => {
65 let parts: Vec<String> = keys
66 .iter()
67 .map(|k| {
68 if let Some(entry) = self.entries.get(&k.id) {
69 let author = if entry.authors.is_empty() {
70 k.id.clone()
71 } else if entry.authors.len() == 1 {
72 surname(&entry.authors[0])
73 } else if entry.authors.len() == 2 {
74 format!(
75 "{} & {}",
76 surname(&entry.authors[0]),
77 surname(&entry.authors[1])
78 )
79 } else {
80 format!("{} et al.", surname(&entry.authors[0]))
81 };
82 let mut s = format!("{}, {}", author, entry.year);
83 if let Some(ref loc) = k.locator {
84 s.push_str(&format!(", {}", loc));
85 }
86 s
87 } else {
88 format!("{}?", k.id)
89 }
90 })
91 .collect();
92 format!("({})", parts.join("; "))
93 }
94 CitationStyle::Numeric => {
95 let nums: Vec<String> = keys
96 .iter()
97 .map(|k| {
98 let idx = key_order.iter().position(|x| x == &k.id);
99 match idx {
100 Some(i) => format!("{}", i + 1),
101 None => "?".to_string(),
102 }
103 })
104 .collect();
105 format!("[{}]", nums.join(", "))
106 }
107 }
108 }
109
110 fn format_bib_entry(&self, entry: &BibEntry, number: Option<usize>) -> String {
111 let mut s = String::new();
112 if let Some(n) = number {
113 s.push_str(&format!("[{}] ", n));
114 }
115 if !entry.authors.is_empty() {
116 s.push_str(&entry.authors.join(", "));
117 s.push_str(". ");
118 }
119 if !entry.title.is_empty() {
120 s.push_str(&format!("\"{}\"", entry.title));
121 s.push_str(". ");
122 }
123 if let Some(ref j) = entry.journal {
124 s.push_str(&format!("*{}*", j));
125 if let Some(ref v) = entry.volume {
126 s.push_str(&format!(", {}", v));
127 }
128 if let Some(ref p) = entry.pages {
129 s.push_str(&format!(", pp. {}", p));
130 }
131 s.push_str(". ");
132 } else if let Some(ref p) = entry.publisher {
133 s.push_str(&format!("{}, ", p));
134 }
135 s.push_str(&format!("{}.", entry.year));
136 if let Some(ref doi) = entry.doi {
137 s.push_str(&format!(" doi:{}", doi));
138 }
139 s
140 }
141}
142
143impl Transform for CitationResolve {
144 fn name(&self) -> &str {
145 "citation-resolve"
146 }
147
148 fn transform(&self, root: &mut Root, _source: &str) {
149 let mut key_order: Vec<String> = Vec::new();
151 collect_citation_keys(&root.children, &mut key_order);
152
153 resolve_citations(&mut root.children, self, &key_order);
155
156 if key_order.is_empty() {
158 return;
159 }
160 let bib_node = self.build_bibliography(&key_order);
161
162 let placeholder = root.children.iter().position(|n| {
164 matches!(n, Node::Component(c) if c.name == "Bibliography" && c.children.is_empty())
165 });
166 if let Some(idx) = placeholder {
167 root.children[idx] = bib_node;
168 } else {
169 root.children.push(bib_node);
170 }
171 }
172}
173
174impl CitationResolve {
175 fn build_bibliography(&self, key_order: &[String]) -> Node {
176 let pos = synthetic_pos();
177 let mut items = Vec::new();
178
179 for (i, key) in key_order.iter().enumerate() {
180 if let Some(entry) = self.entries.get(key) {
181 let number = match self.style {
182 CitationStyle::Numeric => Some(i + 1),
183 CitationStyle::AuthorYear => None,
184 };
185 let text = self.format_bib_entry(entry, number);
186 items.push(Node::ListItem(StandardBlockNode {
187 depth: None,
188 ordered: Some(true),
189 checked: None,
190 id: Some(format!("bib:{}", key)),
191 children: vec![Node::Paragraph(StandardBlockNode {
192 depth: None,
193 ordered: None,
194 checked: None,
195 id: None,
196 children: vec![Node::Text(TextNode {
197 value: text,
198 position: pos.clone(),
199 })],
200 position: pos.clone(),
201 })],
202 position: pos.clone(),
203 }));
204 }
205 }
206
207 Node::Component(ComponentNode {
208 name: "Bibliography".to_string(),
209 is_inline: false,
210 attributes: vec![],
211 children: vec![
212 Node::Heading(StandardBlockNode {
213 depth: Some(2),
214 ordered: None,
215 checked: None,
216 id: Some("references".to_string()),
217 children: vec![Node::Text(TextNode {
218 value: "References".to_string(),
219 position: pos.clone(),
220 })],
221 position: pos.clone(),
222 }),
223 Node::List(StandardBlockNode {
224 depth: None,
225 ordered: Some(true),
226 checked: None,
227 id: None,
228 children: items,
229 position: pos.clone(),
230 }),
231 ],
232 raw_content: String::new(),
233 position: pos,
234 })
235 }
236}
237
238fn collect_citation_keys(nodes: &[Node], order: &mut Vec<String>) {
239 for node in nodes {
240 if let Node::Citation(c) = node {
241 for key in &c.keys {
242 if !order.contains(&key.id) {
243 order.push(key.id.clone());
244 }
245 }
246 }
247 if let Some(children) = node.children() {
248 collect_citation_keys(children, order);
249 }
250 }
251}
252
253fn resolve_citations(nodes: &mut [Node], resolver: &CitationResolve, key_order: &[String]) {
254 for node in nodes.iter_mut() {
255 if let Node::Citation(c) = node {
256 let text = resolver.format_inline(&c.keys, key_order);
257 *node = Node::Text(TextNode {
258 value: text,
259 position: c.position.clone(),
260 });
261 continue;
262 }
263 if let Some(children) = node.children_mut() {
264 resolve_citations(children, resolver, key_order);
265 }
266 }
267}
268
269fn surname(name: &str) -> String {
271 if let Some(comma_pos) = name.find(',') {
272 name[..comma_pos].trim().to_string()
273 } else {
274 name.rsplit_once(' ')
275 .map(|(_, last)| last.to_string())
276 .unwrap_or_else(|| name.to_string())
277 }
278}
279
280pub fn parse_bib(input: &str) -> HashMap<String, BibEntry> {
284 let mut entries = HashMap::new();
285 let mut chars = input.chars().peekable();
286
287 while let Some(&ch) = chars.peek() {
288 if ch == '%' {
289 while chars.peek().is_some_and(|&c| c != '\n') {
291 chars.next();
292 }
293 continue;
294 }
295 if ch == '@' {
296 chars.next(); let mut entry_type = String::new();
298 while chars.peek().is_some_and(|c| c.is_ascii_alphanumeric()) {
299 entry_type.push(chars.next().unwrap());
300 }
301 let entry_type_lower = entry_type.to_ascii_lowercase();
302
303 if matches!(entry_type_lower.as_str(), "string" | "preamble" | "comment") {
305 skip_braced_block(&mut chars);
306 continue;
307 }
308
309 skip_ws(&mut chars);
311 let opener = chars.next();
312 if opener != Some('{') && opener != Some('(') {
313 continue;
314 }
315
316 let mut key = String::new();
318 while chars
319 .peek()
320 .is_some_and(|c| *c != ',' && *c != '}' && *c != ')')
321 {
322 key.push(chars.next().unwrap());
323 }
324 if chars.peek() == Some(&',') {
326 chars.next();
327 }
328 let key = key.trim().to_string();
329 if key.is_empty() {
330 skip_braced_block_remaining(&mut chars);
331 continue;
332 }
333
334 let mut fields: HashMap<String, String> = HashMap::new();
336 loop {
337 skip_ws(&mut chars);
338 match chars.peek() {
339 None | Some('}') | Some(')') => {
340 chars.next();
341 break;
342 }
343 _ => {}
344 }
345
346 let mut field_name = String::new();
348 while chars
349 .peek()
350 .is_some_and(|c| c.is_ascii_alphanumeric() || *c == '-' || *c == '_')
351 {
352 field_name.push(chars.next().unwrap());
353 }
354 let field_name = field_name.trim().to_ascii_lowercase();
355 skip_ws(&mut chars);
356 if chars.peek() == Some(&'=') {
357 chars.next();
358 }
359 skip_ws(&mut chars);
360 let value = read_bib_value(&mut chars);
361 if !field_name.is_empty() {
362 fields.insert(field_name, value);
363 }
364
365 skip_ws(&mut chars);
367 if chars.peek() == Some(&',') {
368 chars.next();
369 }
370 }
371
372 let authors = fields
373 .get("author")
374 .map(|a| parse_bib_authors(a))
375 .unwrap_or_default();
376 let title = fields.get("title").cloned().unwrap_or_default();
377 let year = fields.get("year").cloned().unwrap_or_default();
378
379 entries.insert(
380 key.clone(),
381 BibEntry {
382 key,
383 entry_type: entry_type_lower,
384 authors,
385 title,
386 year,
387 journal: fields.get("journal").cloned(),
388 publisher: fields.get("publisher").cloned(),
389 volume: fields.get("volume").cloned(),
390 pages: fields.get("pages").cloned(),
391 url: fields.get("url").cloned(),
392 doi: fields.get("doi").cloned(),
393 },
394 );
395 } else {
396 chars.next();
397 }
398 }
399 entries
400}
401
402fn skip_ws(chars: &mut std::iter::Peekable<std::str::Chars>) {
403 while chars.peek().is_some_and(|c| c.is_ascii_whitespace()) {
404 chars.next();
405 }
406}
407
408fn skip_braced_block(chars: &mut std::iter::Peekable<std::str::Chars>) {
409 skip_ws(chars);
410 if chars.peek() == Some(&'{') || chars.peek() == Some(&'(') {
411 chars.next();
412 skip_braced_block_remaining(chars);
413 }
414}
415
416fn skip_braced_block_remaining(chars: &mut std::iter::Peekable<std::str::Chars>) {
417 let mut depth = 1;
418 for ch in chars.by_ref() {
419 match ch {
420 '{' | '(' => depth += 1,
421 '}' | ')' => {
422 depth -= 1;
423 if depth == 0 {
424 return;
425 }
426 }
427 _ => {}
428 }
429 }
430}
431
432fn read_bib_value(chars: &mut std::iter::Peekable<std::str::Chars>) -> String {
433 skip_ws(chars);
434 match chars.peek() {
435 Some(&'{') => {
436 chars.next();
437 let mut val = String::new();
438 let mut depth = 1;
439 for ch in chars.by_ref() {
440 match ch {
441 '{' => {
442 depth += 1;
443 val.push(ch);
444 }
445 '}' => {
446 depth -= 1;
447 if depth == 0 {
448 break;
449 }
450 val.push(ch);
451 }
452 _ => val.push(ch),
453 }
454 }
455 val
456 }
457 Some(&'"') => {
458 chars.next();
459 let mut val = String::new();
460 for ch in chars.by_ref() {
461 if ch == '"' {
462 break;
463 }
464 val.push(ch);
465 }
466 val
467 }
468 _ => {
469 let mut val = String::new();
471 while let Some(&ch) = chars.peek() {
472 if ch == ',' || ch == '}' || ch == ')' || ch.is_ascii_whitespace() {
473 break;
474 }
475 val.push(ch);
476 chars.next();
477 }
478 val
479 }
480 }
481}
482
483fn parse_bib_authors(raw: &str) -> Vec<String> {
484 raw.split(" and ")
485 .map(|a| a.trim().to_string())
486 .filter(|a| !a.is_empty())
487 .collect()
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493
494 fn make_citation(keys: &[(&str, Option<&str>)]) -> Node {
495 Node::Citation(CitationNode {
496 keys: keys
497 .iter()
498 .map(|(id, loc)| CitationKey {
499 id: id.to_string(),
500 prefix: None,
501 locator: loc.map(|s| s.to_string()),
502 })
503 .collect(),
504 position: synthetic_pos(),
505 })
506 }
507
508 fn make_root_with(nodes: Vec<Node>) -> Root {
509 Root {
510 node_type: RootType::Root,
511 frontmatter: None,
512 children: nodes,
513 position: synthetic_pos(),
514 }
515 }
516
517 #[test]
518 fn parse_bib_basic() {
519 let bib = r#"
520@article{smith2024,
521 author = {John Smith and Jane Doe},
522 title = {A Great Paper},
523 journal = {Nature},
524 year = {2024},
525 volume = {42},
526 pages = {100--110},
527}
528 "#;
529 let entries = parse_bib(bib);
530 assert_eq!(entries.len(), 1);
531 let e = &entries["smith2024"];
532 assert_eq!(e.authors, vec!["John Smith", "Jane Doe"]);
533 assert_eq!(e.title, "A Great Paper");
534 assert_eq!(e.year, "2024");
535 assert_eq!(e.journal.as_deref(), Some("Nature"));
536 assert_eq!(e.volume.as_deref(), Some("42"));
537 }
538
539 #[test]
540 fn parse_bib_skips_comments_and_preamble() {
541 let bib = r#"
542% This is a comment
543@preamble{"\newcommand{\noopsort}[1]{}"}
544@string{mit = {MIT Press}}
545@book{knuth1984,
546 author = {Donald Knuth},
547 title = {The TeXbook},
548 year = {1984},
549 publisher = {Addison-Wesley},
550}
551 "#;
552 let entries = parse_bib(bib);
553 assert_eq!(entries.len(), 1);
554 assert!(entries.contains_key("knuth1984"));
555 }
556
557 #[test]
558 fn parse_bib_double_quoted_values() {
559 let bib = r#"
560@inproceedings{jones2023,
561 author = "Alice Jones",
562 title = "Machine Learning",
563 year = "2023",
564}
565 "#;
566 let entries = parse_bib(bib);
567 assert_eq!(entries["jones2023"].authors, vec!["Alice Jones"]);
568 assert_eq!(entries["jones2023"].title, "Machine Learning");
569 }
570
571 #[test]
572 fn author_year_single_author() {
573 let mut entries = HashMap::new();
574 entries.insert(
575 "smith2024".to_string(),
576 BibEntry {
577 key: "smith2024".to_string(),
578 entry_type: "article".to_string(),
579 authors: vec!["John Smith".to_string()],
580 title: "A Paper".to_string(),
581 year: "2024".to_string(),
582 journal: None,
583 publisher: None,
584 volume: None,
585 pages: None,
586 url: None,
587 doi: None,
588 },
589 );
590 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
591
592 let keys = vec![CitationKey {
593 id: "smith2024".to_string(),
594 prefix: None,
595 locator: None,
596 }];
597 let text = resolver.format_inline(&keys, &["smith2024".to_string()]);
598 assert_eq!(text, "(Smith, 2024)");
599 }
600
601 #[test]
602 fn author_year_two_authors() {
603 let mut entries = HashMap::new();
604 entries.insert(
605 "k1".to_string(),
606 BibEntry {
607 key: "k1".to_string(),
608 entry_type: "article".to_string(),
609 authors: vec!["Alice Jones".to_string(), "Bob Smith".to_string()],
610 title: String::new(),
611 year: "2023".to_string(),
612 journal: None,
613 publisher: None,
614 volume: None,
615 pages: None,
616 url: None,
617 doi: None,
618 },
619 );
620 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
621 let keys = vec![CitationKey {
622 id: "k1".to_string(),
623 prefix: None,
624 locator: None,
625 }];
626 assert_eq!(
627 resolver.format_inline(&keys, &["k1".to_string()]),
628 "(Jones & Smith, 2023)"
629 );
630 }
631
632 #[test]
633 fn author_year_three_plus_authors() {
634 let mut entries = HashMap::new();
635 entries.insert(
636 "k1".to_string(),
637 BibEntry {
638 key: "k1".to_string(),
639 entry_type: "article".to_string(),
640 authors: vec!["A".to_string(), "B".to_string(), "C".to_string()],
641 title: String::new(),
642 year: "2020".to_string(),
643 journal: None,
644 publisher: None,
645 volume: None,
646 pages: None,
647 url: None,
648 doi: None,
649 },
650 );
651 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
652 let keys = vec![CitationKey {
653 id: "k1".to_string(),
654 prefix: None,
655 locator: None,
656 }];
657 assert_eq!(
658 resolver.format_inline(&keys, &["k1".to_string()]),
659 "(A et al., 2020)"
660 );
661 }
662
663 #[test]
664 fn author_year_with_locator() {
665 let mut entries = HashMap::new();
666 entries.insert(
667 "s".to_string(),
668 BibEntry {
669 key: "s".to_string(),
670 entry_type: "book".to_string(),
671 authors: vec!["Smith".to_string()],
672 title: String::new(),
673 year: "2024".to_string(),
674 journal: None,
675 publisher: None,
676 volume: None,
677 pages: None,
678 url: None,
679 doi: None,
680 },
681 );
682 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
683 let keys = vec![CitationKey {
684 id: "s".to_string(),
685 prefix: None,
686 locator: Some("p. 42".to_string()),
687 }];
688 assert_eq!(
689 resolver.format_inline(&keys, &["s".to_string()]),
690 "(Smith, 2024, p. 42)"
691 );
692 }
693
694 #[test]
695 fn numeric_style() {
696 let entries = HashMap::new(); let resolver = CitationResolve::new(entries, CitationStyle::Numeric);
698 let keys = vec![
699 CitationKey {
700 id: "a".to_string(),
701 prefix: None,
702 locator: None,
703 },
704 CitationKey {
705 id: "c".to_string(),
706 prefix: None,
707 locator: None,
708 },
709 ];
710 let order = vec!["a".to_string(), "b".to_string(), "c".to_string()];
711 assert_eq!(resolver.format_inline(&keys, &order), "[1, 3]");
712 }
713
714 #[test]
715 fn unknown_key_shows_question_mark() {
716 let entries = HashMap::new();
717 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
718 let keys = vec![CitationKey {
719 id: "missing".to_string(),
720 prefix: None,
721 locator: None,
722 }];
723 assert_eq!(
724 resolver.format_inline(&keys, &["missing".to_string()]),
725 "(missing?)"
726 );
727 }
728
729 #[test]
730 fn transform_replaces_citations_and_appends_bib() {
731 let mut entries = HashMap::new();
732 entries.insert(
733 "smith2024".to_string(),
734 BibEntry {
735 key: "smith2024".to_string(),
736 entry_type: "article".to_string(),
737 authors: vec!["John Smith".to_string()],
738 title: "Paper".to_string(),
739 year: "2024".to_string(),
740 journal: Some("Nature".to_string()),
741 publisher: None,
742 volume: None,
743 pages: None,
744 url: None,
745 doi: None,
746 },
747 );
748 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
749
750 let mut root = make_root_with(vec![Node::Paragraph(StandardBlockNode {
751 depth: None,
752 ordered: None,
753 checked: None,
754 id: None,
755 children: vec![
756 Node::Text(TextNode {
757 value: "See ".to_string(),
758 position: synthetic_pos(),
759 }),
760 make_citation(&[("smith2024", None)]),
761 ],
762 position: synthetic_pos(),
763 })]);
764
765 resolver.transform(&mut root, "");
766
767 if let Node::Paragraph(p) = &root.children[0] {
769 assert_eq!(p.children.len(), 2);
770 if let Node::Text(t) = &p.children[1] {
771 assert_eq!(t.value, "(Smith, 2024)");
772 } else {
773 panic!("Expected text node, got {:?}", p.children[1]);
774 }
775 }
776
777 let last = root.children.last().unwrap();
779 assert!(
780 matches!(last, Node::Component(c) if c.name == "Bibliography"),
781 "Expected Bibliography component at end, got {:?}",
782 last
783 );
784 }
785
786 #[test]
787 fn transform_replaces_placeholder() {
788 let entries = HashMap::new();
789 let resolver = CitationResolve::new(entries, CitationStyle::Numeric);
790
791 let mut root = make_root_with(vec![
792 Node::Paragraph(StandardBlockNode {
793 depth: None,
794 ordered: None,
795 checked: None,
796 id: None,
797 children: vec![make_citation(&[("k1", None)])],
798 position: synthetic_pos(),
799 }),
800 Node::Component(ComponentNode {
801 name: "Bibliography".to_string(),
802 is_inline: false,
803 attributes: vec![],
804 children: vec![], raw_content: String::new(),
806 position: synthetic_pos(),
807 }),
808 Node::Paragraph(StandardBlockNode {
809 depth: None,
810 ordered: None,
811 checked: None,
812 id: None,
813 children: vec![Node::Text(TextNode {
814 value: "After bib".to_string(),
815 position: synthetic_pos(),
816 })],
817 position: synthetic_pos(),
818 }),
819 ]);
820
821 resolver.transform(&mut root, "");
822
823 assert_eq!(root.children.len(), 3);
825 assert!(
826 matches!(&root.children[1], Node::Component(c) if c.name == "Bibliography" && !c.children.is_empty()),
827 );
828 assert!(matches!(&root.children[2], Node::Paragraph(_)));
830 }
831
832 #[test]
833 fn no_citations_no_bibliography() {
834 let entries = HashMap::new();
835 let resolver = CitationResolve::new(entries, CitationStyle::AuthorYear);
836 let mut root = make_root_with(vec![Node::Text(TextNode {
837 value: "No citations here".to_string(),
838 position: synthetic_pos(),
839 })]);
840 resolver.transform(&mut root, "");
841 assert_eq!(root.children.len(), 1);
843 }
844
845 #[test]
846 fn surname_extraction() {
847 assert_eq!(surname("John Smith"), "Smith");
848 assert_eq!(surname("Smith, John"), "Smith");
849 assert_eq!(surname("Madonna"), "Madonna");
850 assert_eq!(surname("van der Berg, Jan"), "van der Berg");
851 }
852
853 #[test]
854 fn bib_entry_formatting() {
855 let entry = BibEntry {
856 key: "smith2024".to_string(),
857 entry_type: "article".to_string(),
858 authors: vec!["John Smith".to_string(), "Jane Doe".to_string()],
859 title: "A Great Paper".to_string(),
860 year: "2024".to_string(),
861 journal: Some("Nature".to_string()),
862 publisher: None,
863 volume: Some("42".to_string()),
864 pages: Some("100--110".to_string()),
865 url: None,
866 doi: Some("10.1234/test".to_string()),
867 };
868 let resolver = CitationResolve::new(HashMap::new(), CitationStyle::Numeric);
869 let formatted = resolver.format_bib_entry(&entry, Some(1));
870 assert!(formatted.contains("[1]"));
871 assert!(formatted.contains("John Smith, Jane Doe"));
872 assert!(formatted.contains("\"A Great Paper\""));
873 assert!(formatted.contains("*Nature*"));
874 assert!(formatted.contains("42"));
875 assert!(formatted.contains("100--110"));
876 assert!(formatted.contains("doi:10.1234/test"));
877 }
878}