panache_parser/parser/inlines/
citations.rs1use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12pub(crate) fn try_parse_bracketed_citation(text: &str) -> Option<(usize, &str)> {
17 let bytes = text.as_bytes();
18
19 if bytes.is_empty() || bytes[0] != b'[' {
21 return None;
22 }
23
24 let mut has_citation = false;
27 let mut pos = 1;
28 let mut bracket_depth = 0;
29
30 while pos < bytes.len() {
31 match bytes[pos] {
32 b'\\' => {
33 pos += 2;
35 continue;
36 }
37 b'[' => {
38 bracket_depth += 1;
39 pos += 1;
40 }
41 b']' => {
42 if bracket_depth == 0 {
43 break;
45 }
46 bracket_depth -= 1;
47 pos += 1;
48 }
49 b'@' => {
50 has_citation = true;
52 break;
53 }
54 _ => {
55 pos += 1;
56 }
57 }
58 }
59
60 if !has_citation {
61 return None;
62 }
63
64 pos = 1;
66 bracket_depth = 1;
67
68 while pos < bytes.len() {
69 match bytes[pos] {
70 b'\\' => {
71 pos += 2;
73 continue;
74 }
75 b'[' => {
76 bracket_depth += 1;
77 pos += 1;
78 }
79 b']' => {
80 bracket_depth -= 1;
81 if bracket_depth == 0 {
82 let content = &text[1..pos];
84 return Some((pos + 1, content));
85 }
86 pos += 1;
87 }
88 _ => {
89 pos += 1;
90 }
91 }
92 }
93
94 None
96}
97
98pub(crate) fn try_parse_bare_citation(text: &str) -> Option<(usize, &str, bool)> {
103 let bytes = text.as_bytes();
104
105 if bytes.is_empty() {
106 return None;
107 }
108
109 let mut pos = 0;
110 let has_suppress = bytes[pos] == b'-';
111
112 if has_suppress {
113 pos += 1;
114 if pos >= bytes.len() {
115 return None;
116 }
117 }
118
119 if bytes[pos] != b'@' {
121 return None;
122 }
123 pos += 1;
124
125 if pos >= bytes.len() {
126 return None;
127 }
128
129 let key_start = pos;
131 let key_len = parse_citation_key(&text[pos..])?;
132
133 if key_len == 0 {
134 return None;
135 }
136
137 let total_len = pos + key_len;
138 let key = &text[key_start..total_len];
139
140 Some((total_len, key, has_suppress))
141}
142
143pub fn is_quarto_crossref_key(key: &str) -> bool {
145 let lower = key.to_ascii_lowercase();
146 let mut parts = lower.splitn(2, '-');
147 let prefix = parts.next().unwrap_or("");
148 let rest = parts.next().unwrap_or("");
149 if rest.is_empty() {
150 return false;
151 }
152 matches!(
153 prefix,
154 "fig"
155 | "tbl"
156 | "lst"
157 | "tip"
158 | "nte"
159 | "wrn"
160 | "imp"
161 | "cau"
162 | "thm"
163 | "lem"
164 | "cor"
165 | "prp"
166 | "cnj"
167 | "def"
168 | "exm"
169 | "exr"
170 | "sol"
171 | "rem"
172 | "alg"
173 | "eq"
174 | "sec"
175 )
176}
177
178pub const BOOKDOWN_LABEL_PREFIXES: &[&str] = &[
179 "eq", "fig", "tab", "thm", "lem", "cor", "prp", "cnj", "def", "exm", "exr", "sol", "rem",
180 "alg", "sec", "hyp",
181];
182
183pub fn is_bookdown_label(label: &str) -> bool {
184 BOOKDOWN_LABEL_PREFIXES.contains(&label)
185}
186
187pub fn has_bookdown_prefix(label: &str) -> bool {
188 let mut parts = label.splitn(2, ':');
189 let prefix = parts.next().unwrap_or("");
190 let rest = parts.next().unwrap_or("");
191 if rest.is_empty() {
192 return false;
193 }
194 is_bookdown_label(prefix)
195}
196
197pub(crate) fn emit_crossref(builder: &mut GreenNodeBuilder, key: &str, has_suppress: bool) {
198 builder.start_node(SyntaxKind::CROSSREF.into());
199
200 if has_suppress {
201 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "-@");
202 } else {
203 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "@");
204 }
205
206 if key.starts_with('{') && key.ends_with('}') {
207 builder.token(SyntaxKind::CROSSREF_BRACE_OPEN.into(), "{");
208 builder.token(SyntaxKind::CROSSREF_KEY.into(), &key[1..key.len() - 1]);
209 builder.token(SyntaxKind::CROSSREF_BRACE_CLOSE.into(), "}");
210 } else {
211 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
212 }
213
214 builder.finish_node();
215}
216
217pub(crate) fn emit_bookdown_crossref(builder: &mut GreenNodeBuilder, key: &str) {
218 builder.start_node(SyntaxKind::CROSSREF.into());
219 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_OPEN.into(), "\\@ref(");
220 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
221 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_CLOSE.into(), ")");
222 builder.finish_node();
223}
224
225fn parse_citation_key(text: &str) -> Option<usize> {
235 if text.is_empty() {
236 return None;
237 }
238
239 if text.starts_with('{') {
241 let mut escape_next = false;
243
244 for (idx, ch) in text.char_indices().skip(1) {
245 if escape_next {
246 escape_next = false;
247 continue;
248 }
249
250 match ch {
251 '\\' => escape_next = true,
252 '}' => return Some(idx + ch.len_utf8()),
253 _ => {}
254 }
255 }
256
257 return None;
259 }
260
261 let mut iter = text.char_indices();
263 let (_, first_char) = iter.next()?;
264 if !first_char.is_alphanumeric() && first_char != '_' {
265 return None;
266 }
267
268 let mut last_alnum_end = first_char.len_utf8();
269 let mut last_included_end = last_alnum_end;
270 let mut last_punct_start: Option<usize> = None;
271 let mut prev_was_punct = false;
272
273 for (idx, ch) in iter {
274 if ch.is_alphanumeric() || ch == '_' {
275 prev_was_punct = false;
276 last_alnum_end = idx + ch.len_utf8();
277 last_included_end = last_alnum_end;
278 last_punct_start = None;
279 } else if is_internal_punctuation(ch) {
280 if prev_was_punct {
282 return Some(last_punct_start.unwrap_or(last_alnum_end));
284 }
285 prev_was_punct = true;
286 last_punct_start = Some(idx);
287 last_included_end = idx + ch.len_utf8();
288 } else {
289 break;
291 }
292 }
293
294 if prev_was_punct {
295 return Some(last_alnum_end);
296 }
297
298 if last_included_end == 0 {
299 None
300 } else {
301 Some(last_included_end)
302 }
303}
304
305fn is_internal_punctuation(ch: char) -> bool {
307 matches!(
308 ch,
309 ':' | '.' | '#' | '$' | '%' | '&' | '-' | '+' | '?' | '<' | '>' | '~' | '/'
310 )
311}
312
313pub(crate) fn emit_bracketed_citation(builder: &mut GreenNodeBuilder, content: &str) {
315 builder.start_node(SyntaxKind::CITATION.into());
316
317 builder.token(SyntaxKind::LINK_START.into(), "[");
319
320 emit_bracketed_citation_content(builder, content);
322
323 builder.token(SyntaxKind::LINK_DEST.into(), "]");
325
326 builder.finish_node();
327}
328
329fn emit_bracketed_citation_content(builder: &mut GreenNodeBuilder, content: &str) {
330 let mut text_start = 0;
331 let mut iter = content.char_indices().peekable();
332
333 while let Some((idx, ch)) = iter.next() {
334 if ch == '\\' {
338 iter.next();
339 continue;
340 }
341
342 if ch == '@' || (ch == '-' && matches!(iter.peek(), Some((_, '@')))) {
343 if idx > text_start {
344 builder.token(
345 SyntaxKind::CITATION_CONTENT.into(),
346 &content[text_start..idx],
347 );
348 }
349
350 let mut marker_len = 1;
351 let marker_text = if ch == '-' {
352 iter.next();
353 marker_len = 2;
354 "-@"
355 } else {
356 "@"
357 };
358 builder.token(SyntaxKind::CITATION_MARKER.into(), marker_text);
359
360 let key_start = idx + marker_len;
361 if key_start >= content.len() {
362 text_start = key_start;
363 continue;
364 }
365
366 if let Some(key_len) = parse_citation_key(&content[key_start..]) {
367 let key_end = key_start + key_len;
368 let key = &content[key_start..key_end];
369 if key.starts_with('{') && key.ends_with('}') {
370 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
371 if key.len() > 2 {
372 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
373 }
374 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
375 } else {
376 builder.token(SyntaxKind::CITATION_KEY.into(), key);
377 }
378 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < key_end) {
379 iter.next();
380 }
381 text_start = key_end;
382 continue;
383 }
384
385 text_start = key_start;
386 continue;
387 }
388
389 if ch == ';' {
390 if idx > text_start {
391 builder.token(
392 SyntaxKind::CITATION_CONTENT.into(),
393 &content[text_start..idx],
394 );
395 }
396 builder.token(SyntaxKind::CITATION_SEPARATOR.into(), ";");
397 text_start = idx + ch.len_utf8();
398 continue;
399 }
400 }
401
402 if text_start < content.len() {
403 builder.token(SyntaxKind::CITATION_CONTENT.into(), &content[text_start..]);
404 }
405}
406
407pub(crate) fn emit_bare_citation(builder: &mut GreenNodeBuilder, key: &str, has_suppress: bool) {
409 builder.start_node(SyntaxKind::CITATION.into());
410
411 if has_suppress {
413 builder.token(SyntaxKind::CITATION_MARKER.into(), "-@");
414 } else {
415 builder.token(SyntaxKind::CITATION_MARKER.into(), "@");
416 }
417
418 if key.starts_with('{') && key.ends_with('}') {
420 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
421 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
422 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
423 } else {
424 builder.token(SyntaxKind::CITATION_KEY.into(), key);
425 }
426
427 builder.finish_node();
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
436 fn test_parse_simple_citation_key() {
437 assert_eq!(parse_citation_key("doe99"), Some(5));
438 assert_eq!(parse_citation_key("smith2000"), Some(9));
439 }
440
441 #[test]
442 fn test_parse_citation_key_with_internal_punct() {
443 assert_eq!(parse_citation_key("Foo_bar.baz"), Some(11));
444 assert_eq!(parse_citation_key("author:2020"), Some(11));
445 }
446
447 #[test]
448 fn test_parse_citation_key_trailing_punct() {
449 assert_eq!(parse_citation_key("Foo_bar.baz."), Some(11));
451 assert_eq!(parse_citation_key("key:value:"), Some(9));
452 }
453
454 #[test]
455 fn test_parse_citation_key_double_punct() {
456 assert_eq!(parse_citation_key("Foo_bar--baz"), Some(7)); }
459
460 #[test]
461 fn test_parse_citation_key_with_braces() {
462 assert_eq!(parse_citation_key("{https://example.com}"), Some(21));
463 assert_eq!(parse_citation_key("{Foo_bar.baz.}"), Some(14));
464 }
465
466 #[test]
467 fn test_parse_citation_key_invalid_start() {
468 assert_eq!(parse_citation_key(".invalid"), None);
469 assert_eq!(parse_citation_key(":invalid"), None);
470 }
471
472 #[test]
473 fn test_parse_citation_key_stops_at_space() {
474 assert_eq!(parse_citation_key("key rest"), Some(3));
475 }
476
477 #[test]
479 fn test_parse_bare_citation_simple() {
480 let result = try_parse_bare_citation("@doe99");
481 assert_eq!(result, Some((6, "doe99", false)));
482 }
483
484 #[test]
485 fn test_parse_bare_citation_with_suppress() {
486 let result = try_parse_bare_citation("-@smith04");
487 assert_eq!(result, Some((9, "smith04", true)));
488 }
489
490 #[test]
491 fn test_parse_bare_citation_with_trailing_text() {
492 let result = try_parse_bare_citation("@doe99 says");
493 assert_eq!(result, Some((6, "doe99", false)));
494 }
495
496 #[test]
497 fn test_parse_bare_citation_braced_key() {
498 let result = try_parse_bare_citation("@{https://example.com}");
499 assert_eq!(result, Some((22, "{https://example.com}", false)));
500 }
501
502 #[test]
503 fn test_parse_bare_citation_not_citation() {
504 assert_eq!(try_parse_bare_citation("not a citation"), None);
505 assert_eq!(try_parse_bare_citation("@"), None);
506 }
507
508 #[test]
510 fn test_parse_bracketed_citation_simple() {
511 let result = try_parse_bracketed_citation("[@doe99]");
512 assert_eq!(result, Some((8, "@doe99")));
513 }
514
515 #[test]
516 fn test_parse_bracketed_citation_multiple() {
517 let result = try_parse_bracketed_citation("[@doe99; @smith2000]");
518 assert_eq!(result, Some((20, "@doe99; @smith2000")));
519 }
520
521 #[test]
522 fn test_parse_bracketed_citation_with_prefix() {
523 let result = try_parse_bracketed_citation("[see @doe99]");
524 assert_eq!(result, Some((12, "see @doe99")));
525 }
526
527 #[test]
528 fn test_parse_bracketed_citation_with_locator() {
529 let result = try_parse_bracketed_citation("[@doe99, pp. 33-35]");
530 assert_eq!(result, Some((19, "@doe99, pp. 33-35")));
531 }
532
533 #[test]
534 fn test_parse_bracketed_citation_complex() {
535 let result = try_parse_bracketed_citation("[see @doe99, pp. 33-35 and *passim*]");
536 assert_eq!(result, Some((36, "see @doe99, pp. 33-35 and *passim*")));
537 }
538
539 #[test]
540 fn test_parse_bracketed_citation_with_suppress() {
541 let result = try_parse_bracketed_citation("[-@doe99]");
542 assert_eq!(result, Some((9, "-@doe99")));
543 }
544
545 #[test]
546 fn test_parse_bracketed_citation_not_citation() {
547 assert_eq!(try_parse_bracketed_citation("[text](url)"), None);
549 assert_eq!(try_parse_bracketed_citation("[just text]"), None);
550 }
551
552 #[test]
553 fn test_parse_bracketed_citation_nested_brackets() {
554 let result = try_parse_bracketed_citation("[see [nested] @doe99]");
555 assert_eq!(result, Some((21, "see [nested] @doe99")));
556 }
557
558 #[test]
559 fn test_parse_bracketed_citation_escaped_bracket() {
560 let result = try_parse_bracketed_citation(r"[@doe99 with \] escaped]");
561 assert_eq!(result, Some((24, r"@doe99 with \] escaped")));
562 }
563
564 #[test]
565 fn test_parse_bracketed_citation_paren_in_prefix() {
566 let result = try_parse_bracketed_citation("[see (Smith 1999) and @doe99]");
569 assert_eq!(result, Some((29, "see (Smith 1999) and @doe99")));
570 }
571
572 #[test]
573 fn test_parse_bracketed_citation_escaped_at_in_prefix() {
574 let result =
578 try_parse_bracketed_citation(r"[see also \@ref(svm) and @bischl_applied_2024]");
579 assert_eq!(
580 result,
581 Some((46, r"see also \@ref(svm) and @bischl_applied_2024"))
582 );
583 }
584}