panache_parser/parser/inlines/
citations.rs1use super::sink::InlineSink;
10use crate::syntax::SyntaxKind;
11
12pub(crate) fn try_parse_bracketed_citation(text: &str) -> Option<(usize, &str)> {
17 let bytes = text.as_bytes();
18
19 if bytes.is_empty() || bytes[0] != b'[' {
21 return None;
22 }
23
24 let mut has_citation = false;
27 let mut pos = 1;
28 let mut bracket_depth = 0;
29
30 while pos < bytes.len() {
31 match bytes[pos] {
32 b'\\' => {
33 pos += 2;
35 continue;
36 }
37 b'`' => {
38 match code_span_end(bytes, pos) {
40 Some(end) => pos = end,
41 None => pos += 1,
42 }
43 }
44 b'[' => {
45 bracket_depth += 1;
46 pos += 1;
47 }
48 b']' => {
49 if bracket_depth == 0 {
50 break;
52 }
53 bracket_depth -= 1;
54 pos += 1;
55 }
56 b'@' => {
57 has_citation = true;
59 break;
60 }
61 _ => {
62 pos += 1;
63 }
64 }
65 }
66
67 if !has_citation {
68 return None;
69 }
70
71 pos = 1;
73 bracket_depth = 1;
74
75 while pos < bytes.len() {
76 match bytes[pos] {
77 b'\\' => {
78 pos += 2;
80 continue;
81 }
82 b'`' => {
83 match code_span_end(bytes, pos) {
85 Some(end) => pos = end,
86 None => pos += 1,
87 }
88 }
89 b'[' => {
90 bracket_depth += 1;
91 pos += 1;
92 }
93 b']' => {
94 bracket_depth -= 1;
95 if bracket_depth == 0 {
96 let content = &text[1..pos];
98 return Some((pos + 1, content));
99 }
100 pos += 1;
101 }
102 _ => {
103 pos += 1;
104 }
105 }
106 }
107
108 None
110}
111
112pub(crate) fn try_parse_bare_citation(text: &str) -> Option<(usize, &str, bool)> {
117 let bytes = text.as_bytes();
118
119 if bytes.is_empty() {
120 return None;
121 }
122
123 let mut pos = 0;
124 let has_suppress = bytes[pos] == b'-';
125
126 if has_suppress {
127 pos += 1;
128 if pos >= bytes.len() {
129 return None;
130 }
131 }
132
133 if bytes[pos] != b'@' {
135 return None;
136 }
137 pos += 1;
138
139 if pos >= bytes.len() {
140 return None;
141 }
142
143 let key_start = pos;
145 let key_len = parse_citation_key(&text[pos..])?;
146
147 if key_len == 0 {
148 return None;
149 }
150
151 let total_len = pos + key_len;
152 let key = &text[key_start..total_len];
153
154 Some((total_len, key, has_suppress))
155}
156
157pub fn is_quarto_crossref_key(key: &str) -> bool {
159 let lower = key.to_ascii_lowercase();
160 let mut parts = lower.splitn(2, '-');
161 let prefix = parts.next().unwrap_or("");
162 let rest = parts.next().unwrap_or("");
163 if rest.is_empty() {
164 return false;
165 }
166 matches!(
167 prefix,
168 "fig"
169 | "tbl"
170 | "lst"
171 | "tip"
172 | "nte"
173 | "wrn"
174 | "imp"
175 | "cau"
176 | "thm"
177 | "lem"
178 | "cor"
179 | "prp"
180 | "cnj"
181 | "def"
182 | "exm"
183 | "exr"
184 | "sol"
185 | "rem"
186 | "alg"
187 | "eq"
188 | "sec"
189 )
190}
191
192pub fn is_crossref_key(key: &str, custom_prefixes: &[String]) -> bool {
198 is_quarto_crossref_key(key) || has_custom_crossref_prefix(key, custom_prefixes)
199}
200
201pub fn has_custom_crossref_prefix(key: &str, custom_prefixes: &[String]) -> bool {
207 if custom_prefixes.is_empty() {
208 return false;
209 }
210 let lower = key.to_ascii_lowercase();
211 let mut parts = lower.splitn(2, '-');
212 let prefix = parts.next().unwrap_or("");
213 let rest = parts.next().unwrap_or("");
214 if rest.is_empty() {
215 return false;
216 }
217 custom_prefixes
218 .iter()
219 .any(|candidate| candidate.eq_ignore_ascii_case(prefix))
220}
221
222pub const BOOKDOWN_LABEL_PREFIXES: &[&str] = &[
223 "eq", "fig", "tab", "thm", "lem", "cor", "prp", "cnj", "def", "exm", "exr", "sol", "rem",
224 "alg", "sec", "hyp",
225];
226
227pub fn is_bookdown_label(label: &str) -> bool {
228 BOOKDOWN_LABEL_PREFIXES.contains(&label)
229}
230
231pub fn has_bookdown_prefix(label: &str) -> bool {
232 let mut parts = label.splitn(2, ':');
233 let prefix = parts.next().unwrap_or("");
234 let rest = parts.next().unwrap_or("");
235 if rest.is_empty() {
236 return false;
237 }
238 is_bookdown_label(prefix)
239}
240
241pub(crate) fn emit_crossref(builder: &mut impl InlineSink, key: &str, has_suppress: bool) {
242 builder.start_node(SyntaxKind::CROSSREF.into());
243
244 if has_suppress {
245 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "-@");
246 } else {
247 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "@");
248 }
249
250 if key.starts_with('{') && key.ends_with('}') {
251 builder.token(SyntaxKind::CROSSREF_BRACE_OPEN.into(), "{");
252 builder.token(SyntaxKind::CROSSREF_KEY.into(), &key[1..key.len() - 1]);
253 builder.token(SyntaxKind::CROSSREF_BRACE_CLOSE.into(), "}");
254 } else {
255 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
256 }
257
258 builder.finish_node();
259}
260
261pub(crate) fn emit_bookdown_crossref(builder: &mut impl InlineSink, key: &str) {
262 builder.start_node(SyntaxKind::CROSSREF.into());
263 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_OPEN.into(), "\\@ref(");
264 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
265 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_CLOSE.into(), ")");
266 builder.finish_node();
267}
268
269fn parse_citation_key(text: &str) -> Option<usize> {
279 if text.is_empty() {
280 return None;
281 }
282
283 if text.starts_with('{') {
285 let mut escape_next = false;
287
288 for (idx, ch) in text.char_indices().skip(1) {
289 if escape_next {
290 escape_next = false;
291 continue;
292 }
293
294 match ch {
295 '\\' => escape_next = true,
296 '}' => return Some(idx + ch.len_utf8()),
297 _ => {}
298 }
299 }
300
301 return None;
303 }
304
305 let mut iter = text.char_indices();
307 let (_, first_char) = iter.next()?;
308 if !first_char.is_alphanumeric() && first_char != '_' {
309 return None;
310 }
311
312 let mut last_alnum_end = first_char.len_utf8();
313 let mut last_included_end = last_alnum_end;
314 let mut last_punct_start: Option<usize> = None;
315 let mut prev_was_punct = false;
316
317 for (idx, ch) in iter {
318 if ch.is_alphanumeric() || ch == '_' {
319 prev_was_punct = false;
320 last_alnum_end = idx + ch.len_utf8();
321 last_included_end = last_alnum_end;
322 last_punct_start = None;
323 } else if is_internal_punctuation(ch) {
324 if prev_was_punct {
326 return Some(last_punct_start.unwrap_or(last_alnum_end));
328 }
329 prev_was_punct = true;
330 last_punct_start = Some(idx);
331 last_included_end = idx + ch.len_utf8();
332 } else {
333 break;
335 }
336 }
337
338 if prev_was_punct {
339 return Some(last_alnum_end);
340 }
341
342 if last_included_end == 0 {
343 None
344 } else {
345 Some(last_included_end)
346 }
347}
348
349fn code_span_end(bytes: &[u8], pos: usize) -> Option<usize> {
357 let mut open_end = pos;
358 while open_end < bytes.len() && bytes[open_end] == b'`' {
359 open_end += 1;
360 }
361 let run = open_end - pos;
362
363 let mut i = open_end;
364 while i < bytes.len() {
365 if bytes[i] == b'`' {
366 let close_start = i;
367 while i < bytes.len() && bytes[i] == b'`' {
368 i += 1;
369 }
370 if i - close_start == run {
371 return Some(i);
372 }
373 } else {
374 i += 1;
375 }
376 }
377
378 None
379}
380
381fn is_internal_punctuation(ch: char) -> bool {
383 matches!(
384 ch,
385 ':' | '.' | '#' | '$' | '%' | '&' | '-' | '+' | '?' | '<' | '>' | '~' | '/'
386 )
387}
388
389pub(crate) fn emit_bracketed_citation(builder: &mut impl InlineSink, content: &str) {
391 builder.start_node(SyntaxKind::CITATION.into());
392
393 builder.token(SyntaxKind::LINK_START.into(), "[");
395
396 emit_bracketed_citation_content(builder, content);
398
399 builder.token(SyntaxKind::LINK_DEST.into(), "]");
401
402 builder.finish_node();
403}
404
405fn emit_bracketed_citation_content(builder: &mut impl InlineSink, content: &str) {
406 let mut text_start = 0;
407 let mut iter = content.char_indices().peekable();
408
409 while let Some((idx, ch)) = iter.next() {
410 if ch == '\\' {
414 iter.next();
415 continue;
416 }
417
418 if ch == '`'
419 && let Some(end) = code_span_end(content.as_bytes(), idx)
420 {
421 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < end) {
424 iter.next();
425 }
426 continue;
427 }
428
429 if ch == '@' || (ch == '-' && matches!(iter.peek(), Some((_, '@')))) {
430 if idx > text_start {
431 builder.token(
432 SyntaxKind::CITATION_CONTENT.into(),
433 &content[text_start..idx],
434 );
435 }
436
437 let mut marker_len = 1;
438 let marker_text = if ch == '-' {
439 iter.next();
440 marker_len = 2;
441 "-@"
442 } else {
443 "@"
444 };
445 builder.token(SyntaxKind::CITATION_MARKER.into(), marker_text);
446
447 let key_start = idx + marker_len;
448 if key_start >= content.len() {
449 text_start = key_start;
450 continue;
451 }
452
453 if let Some(key_len) = parse_citation_key(&content[key_start..]) {
454 let key_end = key_start + key_len;
455 let key = &content[key_start..key_end];
456 if key.starts_with('{') && key.ends_with('}') {
457 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
458 if key.len() > 2 {
459 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
460 }
461 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
462 } else {
463 builder.token(SyntaxKind::CITATION_KEY.into(), key);
464 }
465 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < key_end) {
466 iter.next();
467 }
468 text_start = key_end;
469 continue;
470 }
471
472 text_start = key_start;
473 continue;
474 }
475
476 if ch == ';' {
477 if idx > text_start {
478 builder.token(
479 SyntaxKind::CITATION_CONTENT.into(),
480 &content[text_start..idx],
481 );
482 }
483 builder.token(SyntaxKind::CITATION_SEPARATOR.into(), ";");
484 text_start = idx + ch.len_utf8();
485 continue;
486 }
487 }
488
489 if text_start < content.len() {
490 builder.token(SyntaxKind::CITATION_CONTENT.into(), &content[text_start..]);
491 }
492}
493
494pub(crate) fn emit_bare_citation(builder: &mut impl InlineSink, key: &str, has_suppress: bool) {
496 builder.start_node(SyntaxKind::CITATION.into());
497
498 if has_suppress {
500 builder.token(SyntaxKind::CITATION_MARKER.into(), "-@");
501 } else {
502 builder.token(SyntaxKind::CITATION_MARKER.into(), "@");
503 }
504
505 if key.starts_with('{') && key.ends_with('}') {
507 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
508 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
509 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
510 } else {
511 builder.token(SyntaxKind::CITATION_KEY.into(), key);
512 }
513
514 builder.finish_node();
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520
521 #[test]
523 fn test_parse_simple_citation_key() {
524 assert_eq!(parse_citation_key("doe99"), Some(5));
525 assert_eq!(parse_citation_key("smith2000"), Some(9));
526 }
527
528 #[test]
529 fn test_parse_citation_key_with_internal_punct() {
530 assert_eq!(parse_citation_key("Foo_bar.baz"), Some(11));
531 assert_eq!(parse_citation_key("author:2020"), Some(11));
532 }
533
534 #[test]
535 fn test_parse_citation_key_trailing_punct() {
536 assert_eq!(parse_citation_key("Foo_bar.baz."), Some(11));
538 assert_eq!(parse_citation_key("key:value:"), Some(9));
539 }
540
541 #[test]
542 fn test_parse_citation_key_double_punct() {
543 assert_eq!(parse_citation_key("Foo_bar--baz"), Some(7)); }
546
547 #[test]
548 fn test_parse_citation_key_with_braces() {
549 assert_eq!(parse_citation_key("{https://example.com}"), Some(21));
550 assert_eq!(parse_citation_key("{Foo_bar.baz.}"), Some(14));
551 }
552
553 #[test]
554 fn test_parse_citation_key_invalid_start() {
555 assert_eq!(parse_citation_key(".invalid"), None);
556 assert_eq!(parse_citation_key(":invalid"), None);
557 }
558
559 #[test]
560 fn test_parse_citation_key_stops_at_space() {
561 assert_eq!(parse_citation_key("key rest"), Some(3));
562 }
563
564 #[test]
565 fn is_crossref_key_accepts_builtin_without_custom() {
566 assert!(is_crossref_key("fig-plot", &[]));
567 assert!(!is_crossref_key("algo-cd", &[]));
568 }
569
570 #[test]
571 fn is_crossref_key_accepts_custom_prefix() {
572 let custom = vec!["algo".to_string()];
573 assert!(is_crossref_key("algo-cd", &custom));
574 assert!(is_crossref_key("ALGO-cd", &custom));
576 assert!(is_crossref_key("tbl-x", &custom));
578 assert!(!is_crossref_key("algo", &custom));
580 assert!(!is_crossref_key("doe99", &custom));
582 }
583
584 #[test]
586 fn test_parse_bare_citation_simple() {
587 let result = try_parse_bare_citation("@doe99");
588 assert_eq!(result, Some((6, "doe99", false)));
589 }
590
591 #[test]
592 fn test_parse_bare_citation_with_suppress() {
593 let result = try_parse_bare_citation("-@smith04");
594 assert_eq!(result, Some((9, "smith04", true)));
595 }
596
597 #[test]
598 fn test_parse_bare_citation_with_trailing_text() {
599 let result = try_parse_bare_citation("@doe99 says");
600 assert_eq!(result, Some((6, "doe99", false)));
601 }
602
603 #[test]
604 fn test_parse_bare_citation_braced_key() {
605 let result = try_parse_bare_citation("@{https://example.com}");
606 assert_eq!(result, Some((22, "{https://example.com}", false)));
607 }
608
609 #[test]
610 fn test_parse_bare_citation_not_citation() {
611 assert_eq!(try_parse_bare_citation("not a citation"), None);
612 assert_eq!(try_parse_bare_citation("@"), None);
613 }
614
615 #[test]
617 fn test_parse_bracketed_citation_simple() {
618 let result = try_parse_bracketed_citation("[@doe99]");
619 assert_eq!(result, Some((8, "@doe99")));
620 }
621
622 #[test]
623 fn test_parse_bracketed_citation_multiple() {
624 let result = try_parse_bracketed_citation("[@doe99; @smith2000]");
625 assert_eq!(result, Some((20, "@doe99; @smith2000")));
626 }
627
628 #[test]
629 fn test_parse_bracketed_citation_with_prefix() {
630 let result = try_parse_bracketed_citation("[see @doe99]");
631 assert_eq!(result, Some((12, "see @doe99")));
632 }
633
634 #[test]
635 fn test_parse_bracketed_citation_with_locator() {
636 let result = try_parse_bracketed_citation("[@doe99, pp. 33-35]");
637 assert_eq!(result, Some((19, "@doe99, pp. 33-35")));
638 }
639
640 #[test]
641 fn test_parse_bracketed_citation_complex() {
642 let result = try_parse_bracketed_citation("[see @doe99, pp. 33-35 and *passim*]");
643 assert_eq!(result, Some((36, "see @doe99, pp. 33-35 and *passim*")));
644 }
645
646 #[test]
647 fn test_parse_bracketed_citation_with_suppress() {
648 let result = try_parse_bracketed_citation("[-@doe99]");
649 assert_eq!(result, Some((9, "-@doe99")));
650 }
651
652 #[test]
653 fn test_parse_bracketed_citation_not_citation() {
654 assert_eq!(try_parse_bracketed_citation("[text](url)"), None);
656 assert_eq!(try_parse_bracketed_citation("[just text]"), None);
657 }
658
659 #[test]
660 fn test_parse_bracketed_citation_nested_brackets() {
661 let result = try_parse_bracketed_citation("[see [nested] @doe99]");
662 assert_eq!(result, Some((21, "see [nested] @doe99")));
663 }
664
665 #[test]
666 fn test_parse_bracketed_citation_escaped_bracket() {
667 let result = try_parse_bracketed_citation(r"[@doe99 with \] escaped]");
668 assert_eq!(result, Some((24, r"@doe99 with \] escaped")));
669 }
670
671 #[test]
672 fn test_parse_bracketed_citation_paren_in_prefix() {
673 let result = try_parse_bracketed_citation("[see (Smith 1999) and @doe99]");
676 assert_eq!(result, Some((29, "see (Smith 1999) and @doe99")));
677 }
678
679 #[test]
680 fn test_bracketed_citation_ignores_at_in_code_span() {
681 assert_eq!(try_parse_bracketed_citation("[`@foo`]"), None);
684 }
685
686 #[test]
687 fn test_bracketed_citation_code_span_in_prefix() {
688 assert_eq!(
691 try_parse_bracketed_citation("[`x@y` @doe99]"),
692 Some((14, "`x@y` @doe99"))
693 );
694 }
695
696 #[test]
697 fn test_bracketed_citation_bracket_in_code_span() {
698 assert_eq!(
700 try_parse_bracketed_citation("[`a]b` @doe99]"),
701 Some((14, "`a]b` @doe99"))
702 );
703 }
704
705 #[test]
706 fn test_bracketed_citation_unterminated_backtick() {
707 assert_eq!(
709 try_parse_bracketed_citation("[`@foo bar]"),
710 Some((11, "`@foo bar"))
711 );
712 }
713
714 #[test]
715 fn test_parse_bracketed_citation_escaped_at_in_prefix() {
716 let result =
720 try_parse_bracketed_citation(r"[see also \@ref(svm) and @bischl_applied_2024]");
721 assert_eq!(
722 result,
723 Some((46, r"see also \@ref(svm) and @bischl_applied_2024"))
724 );
725 }
726}