panache_parser/parser/inlines/
citations.rs1use super::sink::InlineSink;
10use crate::syntax::SyntaxKind;
11
12pub(crate) fn try_parse_bracketed_citation(text: &str) -> Option<(usize, &str)> {
17 let bytes = text.as_bytes();
18
19 if bytes.is_empty() || bytes[0] != b'[' {
21 return None;
22 }
23
24 let mut has_citation = false;
27 let mut pos = 1;
28 let mut bracket_depth = 0;
29
30 while pos < bytes.len() {
31 match bytes[pos] {
32 b'\\' => {
33 pos += 2;
35 continue;
36 }
37 b'`' => {
38 match code_span_end(bytes, pos) {
40 Some(end) => pos = end,
41 None => pos += 1,
42 }
43 }
44 b'[' => {
45 bracket_depth += 1;
46 pos += 1;
47 }
48 b']' => {
49 if bracket_depth == 0 {
50 break;
52 }
53 bracket_depth -= 1;
54 pos += 1;
55 }
56 b'@' => {
57 has_citation = true;
59 break;
60 }
61 _ => {
62 pos += 1;
63 }
64 }
65 }
66
67 if !has_citation {
68 return None;
69 }
70
71 pos = 1;
73 bracket_depth = 1;
74
75 while pos < bytes.len() {
76 match bytes[pos] {
77 b'\\' => {
78 pos += 2;
80 continue;
81 }
82 b'`' => {
83 match code_span_end(bytes, pos) {
85 Some(end) => pos = end,
86 None => pos += 1,
87 }
88 }
89 b'[' => {
90 bracket_depth += 1;
91 pos += 1;
92 }
93 b']' => {
94 bracket_depth -= 1;
95 if bracket_depth == 0 {
96 let content = &text[1..pos];
98 return Some((pos + 1, content));
99 }
100 pos += 1;
101 }
102 _ => {
103 pos += 1;
104 }
105 }
106 }
107
108 None
110}
111
112pub(crate) fn try_parse_bare_citation(text: &str) -> Option<(usize, &str, bool)> {
117 let bytes = text.as_bytes();
118
119 if bytes.is_empty() {
120 return None;
121 }
122
123 let mut pos = 0;
124 let has_suppress = bytes[pos] == b'-';
125
126 if has_suppress {
127 pos += 1;
128 if pos >= bytes.len() {
129 return None;
130 }
131 }
132
133 if bytes[pos] != b'@' {
135 return None;
136 }
137 pos += 1;
138
139 if pos >= bytes.len() {
140 return None;
141 }
142
143 let key_start = pos;
145 let key_len = parse_citation_key(&text[pos..])?;
146
147 if key_len == 0 {
148 return None;
149 }
150
151 let total_len = pos + key_len;
152 let key = &text[key_start..total_len];
153
154 Some((total_len, key, has_suppress))
155}
156
157pub fn is_quarto_crossref_key(key: &str) -> bool {
159 let lower = key.to_ascii_lowercase();
160 let mut parts = lower.splitn(2, '-');
161 let prefix = parts.next().unwrap_or("");
162 let rest = parts.next().unwrap_or("");
163 if rest.is_empty() {
164 return false;
165 }
166 matches!(
167 prefix,
168 "fig"
169 | "tbl"
170 | "lst"
171 | "tip"
172 | "nte"
173 | "wrn"
174 | "imp"
175 | "cau"
176 | "thm"
177 | "lem"
178 | "cor"
179 | "prp"
180 | "cnj"
181 | "def"
182 | "exm"
183 | "exr"
184 | "sol"
185 | "rem"
186 | "alg"
187 | "eq"
188 | "sec"
189 )
190}
191
192pub const BOOKDOWN_LABEL_PREFIXES: &[&str] = &[
193 "eq", "fig", "tab", "thm", "lem", "cor", "prp", "cnj", "def", "exm", "exr", "sol", "rem",
194 "alg", "sec", "hyp",
195];
196
197pub fn is_bookdown_label(label: &str) -> bool {
198 BOOKDOWN_LABEL_PREFIXES.contains(&label)
199}
200
201pub fn has_bookdown_prefix(label: &str) -> bool {
202 let mut parts = label.splitn(2, ':');
203 let prefix = parts.next().unwrap_or("");
204 let rest = parts.next().unwrap_or("");
205 if rest.is_empty() {
206 return false;
207 }
208 is_bookdown_label(prefix)
209}
210
211pub(crate) fn emit_crossref(builder: &mut impl InlineSink, key: &str, has_suppress: bool) {
212 builder.start_node(SyntaxKind::CROSSREF.into());
213
214 if has_suppress {
215 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "-@");
216 } else {
217 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "@");
218 }
219
220 if key.starts_with('{') && key.ends_with('}') {
221 builder.token(SyntaxKind::CROSSREF_BRACE_OPEN.into(), "{");
222 builder.token(SyntaxKind::CROSSREF_KEY.into(), &key[1..key.len() - 1]);
223 builder.token(SyntaxKind::CROSSREF_BRACE_CLOSE.into(), "}");
224 } else {
225 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
226 }
227
228 builder.finish_node();
229}
230
231pub(crate) fn emit_bookdown_crossref(builder: &mut impl InlineSink, key: &str) {
232 builder.start_node(SyntaxKind::CROSSREF.into());
233 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_OPEN.into(), "\\@ref(");
234 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
235 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_CLOSE.into(), ")");
236 builder.finish_node();
237}
238
239fn parse_citation_key(text: &str) -> Option<usize> {
249 if text.is_empty() {
250 return None;
251 }
252
253 if text.starts_with('{') {
255 let mut escape_next = false;
257
258 for (idx, ch) in text.char_indices().skip(1) {
259 if escape_next {
260 escape_next = false;
261 continue;
262 }
263
264 match ch {
265 '\\' => escape_next = true,
266 '}' => return Some(idx + ch.len_utf8()),
267 _ => {}
268 }
269 }
270
271 return None;
273 }
274
275 let mut iter = text.char_indices();
277 let (_, first_char) = iter.next()?;
278 if !first_char.is_alphanumeric() && first_char != '_' {
279 return None;
280 }
281
282 let mut last_alnum_end = first_char.len_utf8();
283 let mut last_included_end = last_alnum_end;
284 let mut last_punct_start: Option<usize> = None;
285 let mut prev_was_punct = false;
286
287 for (idx, ch) in iter {
288 if ch.is_alphanumeric() || ch == '_' {
289 prev_was_punct = false;
290 last_alnum_end = idx + ch.len_utf8();
291 last_included_end = last_alnum_end;
292 last_punct_start = None;
293 } else if is_internal_punctuation(ch) {
294 if prev_was_punct {
296 return Some(last_punct_start.unwrap_or(last_alnum_end));
298 }
299 prev_was_punct = true;
300 last_punct_start = Some(idx);
301 last_included_end = idx + ch.len_utf8();
302 } else {
303 break;
305 }
306 }
307
308 if prev_was_punct {
309 return Some(last_alnum_end);
310 }
311
312 if last_included_end == 0 {
313 None
314 } else {
315 Some(last_included_end)
316 }
317}
318
319fn code_span_end(bytes: &[u8], pos: usize) -> Option<usize> {
327 let mut open_end = pos;
328 while open_end < bytes.len() && bytes[open_end] == b'`' {
329 open_end += 1;
330 }
331 let run = open_end - pos;
332
333 let mut i = open_end;
334 while i < bytes.len() {
335 if bytes[i] == b'`' {
336 let close_start = i;
337 while i < bytes.len() && bytes[i] == b'`' {
338 i += 1;
339 }
340 if i - close_start == run {
341 return Some(i);
342 }
343 } else {
344 i += 1;
345 }
346 }
347
348 None
349}
350
351fn is_internal_punctuation(ch: char) -> bool {
353 matches!(
354 ch,
355 ':' | '.' | '#' | '$' | '%' | '&' | '-' | '+' | '?' | '<' | '>' | '~' | '/'
356 )
357}
358
359pub(crate) fn emit_bracketed_citation(builder: &mut impl InlineSink, content: &str) {
361 builder.start_node(SyntaxKind::CITATION.into());
362
363 builder.token(SyntaxKind::LINK_START.into(), "[");
365
366 emit_bracketed_citation_content(builder, content);
368
369 builder.token(SyntaxKind::LINK_DEST.into(), "]");
371
372 builder.finish_node();
373}
374
375fn emit_bracketed_citation_content(builder: &mut impl InlineSink, content: &str) {
376 let mut text_start = 0;
377 let mut iter = content.char_indices().peekable();
378
379 while let Some((idx, ch)) = iter.next() {
380 if ch == '\\' {
384 iter.next();
385 continue;
386 }
387
388 if ch == '`'
389 && let Some(end) = code_span_end(content.as_bytes(), idx)
390 {
391 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < end) {
394 iter.next();
395 }
396 continue;
397 }
398
399 if ch == '@' || (ch == '-' && matches!(iter.peek(), Some((_, '@')))) {
400 if idx > text_start {
401 builder.token(
402 SyntaxKind::CITATION_CONTENT.into(),
403 &content[text_start..idx],
404 );
405 }
406
407 let mut marker_len = 1;
408 let marker_text = if ch == '-' {
409 iter.next();
410 marker_len = 2;
411 "-@"
412 } else {
413 "@"
414 };
415 builder.token(SyntaxKind::CITATION_MARKER.into(), marker_text);
416
417 let key_start = idx + marker_len;
418 if key_start >= content.len() {
419 text_start = key_start;
420 continue;
421 }
422
423 if let Some(key_len) = parse_citation_key(&content[key_start..]) {
424 let key_end = key_start + key_len;
425 let key = &content[key_start..key_end];
426 if key.starts_with('{') && key.ends_with('}') {
427 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
428 if key.len() > 2 {
429 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
430 }
431 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
432 } else {
433 builder.token(SyntaxKind::CITATION_KEY.into(), key);
434 }
435 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < key_end) {
436 iter.next();
437 }
438 text_start = key_end;
439 continue;
440 }
441
442 text_start = key_start;
443 continue;
444 }
445
446 if ch == ';' {
447 if idx > text_start {
448 builder.token(
449 SyntaxKind::CITATION_CONTENT.into(),
450 &content[text_start..idx],
451 );
452 }
453 builder.token(SyntaxKind::CITATION_SEPARATOR.into(), ";");
454 text_start = idx + ch.len_utf8();
455 continue;
456 }
457 }
458
459 if text_start < content.len() {
460 builder.token(SyntaxKind::CITATION_CONTENT.into(), &content[text_start..]);
461 }
462}
463
464pub(crate) fn emit_bare_citation(builder: &mut impl InlineSink, key: &str, has_suppress: bool) {
466 builder.start_node(SyntaxKind::CITATION.into());
467
468 if has_suppress {
470 builder.token(SyntaxKind::CITATION_MARKER.into(), "-@");
471 } else {
472 builder.token(SyntaxKind::CITATION_MARKER.into(), "@");
473 }
474
475 if key.starts_with('{') && key.ends_with('}') {
477 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
478 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
479 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
480 } else {
481 builder.token(SyntaxKind::CITATION_KEY.into(), key);
482 }
483
484 builder.finish_node();
485}
486
487#[cfg(test)]
488mod tests {
489 use super::*;
490
491 #[test]
493 fn test_parse_simple_citation_key() {
494 assert_eq!(parse_citation_key("doe99"), Some(5));
495 assert_eq!(parse_citation_key("smith2000"), Some(9));
496 }
497
498 #[test]
499 fn test_parse_citation_key_with_internal_punct() {
500 assert_eq!(parse_citation_key("Foo_bar.baz"), Some(11));
501 assert_eq!(parse_citation_key("author:2020"), Some(11));
502 }
503
504 #[test]
505 fn test_parse_citation_key_trailing_punct() {
506 assert_eq!(parse_citation_key("Foo_bar.baz."), Some(11));
508 assert_eq!(parse_citation_key("key:value:"), Some(9));
509 }
510
511 #[test]
512 fn test_parse_citation_key_double_punct() {
513 assert_eq!(parse_citation_key("Foo_bar--baz"), Some(7)); }
516
517 #[test]
518 fn test_parse_citation_key_with_braces() {
519 assert_eq!(parse_citation_key("{https://example.com}"), Some(21));
520 assert_eq!(parse_citation_key("{Foo_bar.baz.}"), Some(14));
521 }
522
523 #[test]
524 fn test_parse_citation_key_invalid_start() {
525 assert_eq!(parse_citation_key(".invalid"), None);
526 assert_eq!(parse_citation_key(":invalid"), None);
527 }
528
529 #[test]
530 fn test_parse_citation_key_stops_at_space() {
531 assert_eq!(parse_citation_key("key rest"), Some(3));
532 }
533
534 #[test]
536 fn test_parse_bare_citation_simple() {
537 let result = try_parse_bare_citation("@doe99");
538 assert_eq!(result, Some((6, "doe99", false)));
539 }
540
541 #[test]
542 fn test_parse_bare_citation_with_suppress() {
543 let result = try_parse_bare_citation("-@smith04");
544 assert_eq!(result, Some((9, "smith04", true)));
545 }
546
547 #[test]
548 fn test_parse_bare_citation_with_trailing_text() {
549 let result = try_parse_bare_citation("@doe99 says");
550 assert_eq!(result, Some((6, "doe99", false)));
551 }
552
553 #[test]
554 fn test_parse_bare_citation_braced_key() {
555 let result = try_parse_bare_citation("@{https://example.com}");
556 assert_eq!(result, Some((22, "{https://example.com}", false)));
557 }
558
559 #[test]
560 fn test_parse_bare_citation_not_citation() {
561 assert_eq!(try_parse_bare_citation("not a citation"), None);
562 assert_eq!(try_parse_bare_citation("@"), None);
563 }
564
565 #[test]
567 fn test_parse_bracketed_citation_simple() {
568 let result = try_parse_bracketed_citation("[@doe99]");
569 assert_eq!(result, Some((8, "@doe99")));
570 }
571
572 #[test]
573 fn test_parse_bracketed_citation_multiple() {
574 let result = try_parse_bracketed_citation("[@doe99; @smith2000]");
575 assert_eq!(result, Some((20, "@doe99; @smith2000")));
576 }
577
578 #[test]
579 fn test_parse_bracketed_citation_with_prefix() {
580 let result = try_parse_bracketed_citation("[see @doe99]");
581 assert_eq!(result, Some((12, "see @doe99")));
582 }
583
584 #[test]
585 fn test_parse_bracketed_citation_with_locator() {
586 let result = try_parse_bracketed_citation("[@doe99, pp. 33-35]");
587 assert_eq!(result, Some((19, "@doe99, pp. 33-35")));
588 }
589
590 #[test]
591 fn test_parse_bracketed_citation_complex() {
592 let result = try_parse_bracketed_citation("[see @doe99, pp. 33-35 and *passim*]");
593 assert_eq!(result, Some((36, "see @doe99, pp. 33-35 and *passim*")));
594 }
595
596 #[test]
597 fn test_parse_bracketed_citation_with_suppress() {
598 let result = try_parse_bracketed_citation("[-@doe99]");
599 assert_eq!(result, Some((9, "-@doe99")));
600 }
601
602 #[test]
603 fn test_parse_bracketed_citation_not_citation() {
604 assert_eq!(try_parse_bracketed_citation("[text](url)"), None);
606 assert_eq!(try_parse_bracketed_citation("[just text]"), None);
607 }
608
609 #[test]
610 fn test_parse_bracketed_citation_nested_brackets() {
611 let result = try_parse_bracketed_citation("[see [nested] @doe99]");
612 assert_eq!(result, Some((21, "see [nested] @doe99")));
613 }
614
615 #[test]
616 fn test_parse_bracketed_citation_escaped_bracket() {
617 let result = try_parse_bracketed_citation(r"[@doe99 with \] escaped]");
618 assert_eq!(result, Some((24, r"@doe99 with \] escaped")));
619 }
620
621 #[test]
622 fn test_parse_bracketed_citation_paren_in_prefix() {
623 let result = try_parse_bracketed_citation("[see (Smith 1999) and @doe99]");
626 assert_eq!(result, Some((29, "see (Smith 1999) and @doe99")));
627 }
628
629 #[test]
630 fn test_bracketed_citation_ignores_at_in_code_span() {
631 assert_eq!(try_parse_bracketed_citation("[`@foo`]"), None);
634 }
635
636 #[test]
637 fn test_bracketed_citation_code_span_in_prefix() {
638 assert_eq!(
641 try_parse_bracketed_citation("[`x@y` @doe99]"),
642 Some((14, "`x@y` @doe99"))
643 );
644 }
645
646 #[test]
647 fn test_bracketed_citation_bracket_in_code_span() {
648 assert_eq!(
650 try_parse_bracketed_citation("[`a]b` @doe99]"),
651 Some((14, "`a]b` @doe99"))
652 );
653 }
654
655 #[test]
656 fn test_bracketed_citation_unterminated_backtick() {
657 assert_eq!(
659 try_parse_bracketed_citation("[`@foo bar]"),
660 Some((11, "`@foo bar"))
661 );
662 }
663
664 #[test]
665 fn test_parse_bracketed_citation_escaped_at_in_prefix() {
666 let result =
670 try_parse_bracketed_citation(r"[see also \@ref(svm) and @bischl_applied_2024]");
671 assert_eq!(
672 result,
673 Some((46, r"see also \@ref(svm) and @bischl_applied_2024"))
674 );
675 }
676}