panache_parser/parser/inlines/
citations.rs1use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12pub(crate) fn try_parse_bracketed_citation(text: &str) -> Option<(usize, &str)> {
17 let bytes = text.as_bytes();
18
19 if bytes.is_empty() || bytes[0] != b'[' {
21 return None;
22 }
23
24 let mut has_citation = false;
27 let mut pos = 1;
28 let mut bracket_depth = 0;
29
30 while pos < bytes.len() {
31 match bytes[pos] {
32 b'\\' => {
33 pos += 2;
35 continue;
36 }
37 b'[' => {
38 bracket_depth += 1;
39 pos += 1;
40 }
41 b']' => {
42 if bracket_depth == 0 {
43 break;
45 }
46 bracket_depth -= 1;
47 pos += 1;
48 }
49 b'@' => {
50 has_citation = true;
52 break;
53 }
54 b'(' if bracket_depth == 0 => {
55 break;
58 }
59 _ => {
60 pos += 1;
61 }
62 }
63 }
64
65 if !has_citation {
66 return None;
67 }
68
69 pos = 1;
71 bracket_depth = 1;
72
73 while pos < bytes.len() {
74 match bytes[pos] {
75 b'\\' => {
76 pos += 2;
78 continue;
79 }
80 b'[' => {
81 bracket_depth += 1;
82 pos += 1;
83 }
84 b']' => {
85 bracket_depth -= 1;
86 if bracket_depth == 0 {
87 let content = &text[1..pos];
89 return Some((pos + 1, content));
90 }
91 pos += 1;
92 }
93 _ => {
94 pos += 1;
95 }
96 }
97 }
98
99 None
101}
102
103pub(crate) fn try_parse_bare_citation(text: &str) -> Option<(usize, &str, bool)> {
108 let bytes = text.as_bytes();
109
110 if bytes.is_empty() {
111 return None;
112 }
113
114 let mut pos = 0;
115 let has_suppress = bytes[pos] == b'-';
116
117 if has_suppress {
118 pos += 1;
119 if pos >= bytes.len() {
120 return None;
121 }
122 }
123
124 if bytes[pos] != b'@' {
126 return None;
127 }
128 pos += 1;
129
130 if pos >= bytes.len() {
131 return None;
132 }
133
134 let key_start = pos;
136 let key_len = parse_citation_key(&text[pos..])?;
137
138 if key_len == 0 {
139 return None;
140 }
141
142 let total_len = pos + key_len;
143 let key = &text[key_start..total_len];
144
145 Some((total_len, key, has_suppress))
146}
147
148pub fn is_quarto_crossref_key(key: &str) -> bool {
150 let lower = key.to_ascii_lowercase();
151 let mut parts = lower.splitn(2, '-');
152 let prefix = parts.next().unwrap_or("");
153 let rest = parts.next().unwrap_or("");
154 if rest.is_empty() {
155 return false;
156 }
157 matches!(
158 prefix,
159 "fig"
160 | "tbl"
161 | "lst"
162 | "tip"
163 | "nte"
164 | "wrn"
165 | "imp"
166 | "cau"
167 | "thm"
168 | "lem"
169 | "cor"
170 | "prp"
171 | "cnj"
172 | "def"
173 | "exm"
174 | "exr"
175 | "sol"
176 | "rem"
177 | "alg"
178 | "eq"
179 | "sec"
180 )
181}
182
183pub const BOOKDOWN_LABEL_PREFIXES: &[&str] = &[
184 "eq", "fig", "tab", "thm", "lem", "cor", "prp", "cnj", "def", "exm", "exr", "sol", "rem",
185 "alg", "sec", "hyp",
186];
187
188pub fn is_bookdown_label(label: &str) -> bool {
189 BOOKDOWN_LABEL_PREFIXES.contains(&label)
190}
191
192pub fn has_bookdown_prefix(label: &str) -> bool {
193 let mut parts = label.splitn(2, ':');
194 let prefix = parts.next().unwrap_or("");
195 let rest = parts.next().unwrap_or("");
196 if rest.is_empty() {
197 return false;
198 }
199 is_bookdown_label(prefix)
200}
201
202pub(crate) fn emit_crossref(builder: &mut GreenNodeBuilder, key: &str, has_suppress: bool) {
203 builder.start_node(SyntaxKind::CROSSREF.into());
204
205 if has_suppress {
206 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "-@");
207 } else {
208 builder.token(SyntaxKind::CROSSREF_MARKER.into(), "@");
209 }
210
211 if key.starts_with('{') && key.ends_with('}') {
212 builder.token(SyntaxKind::CROSSREF_BRACE_OPEN.into(), "{");
213 builder.token(SyntaxKind::CROSSREF_KEY.into(), &key[1..key.len() - 1]);
214 builder.token(SyntaxKind::CROSSREF_BRACE_CLOSE.into(), "}");
215 } else {
216 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
217 }
218
219 builder.finish_node();
220}
221
222pub(crate) fn emit_bookdown_crossref(builder: &mut GreenNodeBuilder, key: &str) {
223 builder.start_node(SyntaxKind::CROSSREF.into());
224 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_OPEN.into(), "\\@ref(");
225 builder.token(SyntaxKind::CROSSREF_KEY.into(), key);
226 builder.token(SyntaxKind::CROSSREF_BOOKDOWN_CLOSE.into(), ")");
227 builder.finish_node();
228}
229
230fn parse_citation_key(text: &str) -> Option<usize> {
240 if text.is_empty() {
241 return None;
242 }
243
244 if text.starts_with('{') {
246 let mut escape_next = false;
248
249 for (idx, ch) in text.char_indices().skip(1) {
250 if escape_next {
251 escape_next = false;
252 continue;
253 }
254
255 match ch {
256 '\\' => escape_next = true,
257 '}' => return Some(idx + ch.len_utf8()),
258 _ => {}
259 }
260 }
261
262 return None;
264 }
265
266 let mut iter = text.char_indices();
268 let (_, first_char) = iter.next()?;
269 if !first_char.is_alphanumeric() && first_char != '_' {
270 return None;
271 }
272
273 let mut last_alnum_end = first_char.len_utf8();
274 let mut last_included_end = last_alnum_end;
275 let mut last_punct_start: Option<usize> = None;
276 let mut prev_was_punct = false;
277
278 for (idx, ch) in iter {
279 if ch.is_alphanumeric() || ch == '_' {
280 prev_was_punct = false;
281 last_alnum_end = idx + ch.len_utf8();
282 last_included_end = last_alnum_end;
283 last_punct_start = None;
284 } else if is_internal_punctuation(ch) {
285 if prev_was_punct {
287 return Some(last_punct_start.unwrap_or(last_alnum_end));
289 }
290 prev_was_punct = true;
291 last_punct_start = Some(idx);
292 last_included_end = idx + ch.len_utf8();
293 } else {
294 break;
296 }
297 }
298
299 if prev_was_punct {
300 return Some(last_alnum_end);
301 }
302
303 if last_included_end == 0 {
304 None
305 } else {
306 Some(last_included_end)
307 }
308}
309
310fn is_internal_punctuation(ch: char) -> bool {
312 matches!(
313 ch,
314 ':' | '.' | '#' | '$' | '%' | '&' | '-' | '+' | '?' | '<' | '>' | '~' | '/'
315 )
316}
317
318pub(crate) fn emit_bracketed_citation(builder: &mut GreenNodeBuilder, content: &str) {
320 builder.start_node(SyntaxKind::CITATION.into());
321
322 builder.token(SyntaxKind::LINK_START.into(), "[");
324
325 emit_bracketed_citation_content(builder, content);
327
328 builder.token(SyntaxKind::LINK_DEST.into(), "]");
330
331 builder.finish_node();
332}
333
334fn emit_bracketed_citation_content(builder: &mut GreenNodeBuilder, content: &str) {
335 let mut text_start = 0;
336 let mut iter = content.char_indices().peekable();
337
338 while let Some((idx, ch)) = iter.next() {
339 if ch == '@' || (ch == '-' && matches!(iter.peek(), Some((_, '@')))) {
340 if idx > text_start {
341 builder.token(
342 SyntaxKind::CITATION_CONTENT.into(),
343 &content[text_start..idx],
344 );
345 }
346
347 let mut marker_len = 1;
348 let marker_text = if ch == '-' {
349 iter.next();
350 marker_len = 2;
351 "-@"
352 } else {
353 "@"
354 };
355 builder.token(SyntaxKind::CITATION_MARKER.into(), marker_text);
356
357 let key_start = idx + marker_len;
358 if key_start >= content.len() {
359 text_start = key_start;
360 continue;
361 }
362
363 if let Some(key_len) = parse_citation_key(&content[key_start..]) {
364 let key_end = key_start + key_len;
365 let key = &content[key_start..key_end];
366 if key.starts_with('{') && key.ends_with('}') {
367 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
368 if key.len() > 2 {
369 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
370 }
371 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
372 } else {
373 builder.token(SyntaxKind::CITATION_KEY.into(), key);
374 }
375 while matches!(iter.peek(), Some((next_idx, _)) if *next_idx < key_end) {
376 iter.next();
377 }
378 text_start = key_end;
379 continue;
380 }
381
382 text_start = key_start;
383 continue;
384 }
385
386 if ch == ';' {
387 if idx > text_start {
388 builder.token(
389 SyntaxKind::CITATION_CONTENT.into(),
390 &content[text_start..idx],
391 );
392 }
393 builder.token(SyntaxKind::CITATION_SEPARATOR.into(), ";");
394 text_start = idx + ch.len_utf8();
395 continue;
396 }
397 }
398
399 if text_start < content.len() {
400 builder.token(SyntaxKind::CITATION_CONTENT.into(), &content[text_start..]);
401 }
402}
403
404pub(crate) fn emit_bare_citation(builder: &mut GreenNodeBuilder, key: &str, has_suppress: bool) {
406 builder.start_node(SyntaxKind::CITATION.into());
407
408 if has_suppress {
410 builder.token(SyntaxKind::CITATION_MARKER.into(), "-@");
411 } else {
412 builder.token(SyntaxKind::CITATION_MARKER.into(), "@");
413 }
414
415 if key.starts_with('{') && key.ends_with('}') {
417 builder.token(SyntaxKind::CITATION_BRACE_OPEN.into(), "{");
418 builder.token(SyntaxKind::CITATION_KEY.into(), &key[1..key.len() - 1]);
419 builder.token(SyntaxKind::CITATION_BRACE_CLOSE.into(), "}");
420 } else {
421 builder.token(SyntaxKind::CITATION_KEY.into(), key);
422 }
423
424 builder.finish_node();
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
433 fn test_parse_simple_citation_key() {
434 assert_eq!(parse_citation_key("doe99"), Some(5));
435 assert_eq!(parse_citation_key("smith2000"), Some(9));
436 }
437
438 #[test]
439 fn test_parse_citation_key_with_internal_punct() {
440 assert_eq!(parse_citation_key("Foo_bar.baz"), Some(11));
441 assert_eq!(parse_citation_key("author:2020"), Some(11));
442 }
443
444 #[test]
445 fn test_parse_citation_key_trailing_punct() {
446 assert_eq!(parse_citation_key("Foo_bar.baz."), Some(11));
448 assert_eq!(parse_citation_key("key:value:"), Some(9));
449 }
450
451 #[test]
452 fn test_parse_citation_key_double_punct() {
453 assert_eq!(parse_citation_key("Foo_bar--baz"), Some(7)); }
456
457 #[test]
458 fn test_parse_citation_key_with_braces() {
459 assert_eq!(parse_citation_key("{https://example.com}"), Some(21));
460 assert_eq!(parse_citation_key("{Foo_bar.baz.}"), Some(14));
461 }
462
463 #[test]
464 fn test_parse_citation_key_invalid_start() {
465 assert_eq!(parse_citation_key(".invalid"), None);
466 assert_eq!(parse_citation_key(":invalid"), None);
467 }
468
469 #[test]
470 fn test_parse_citation_key_stops_at_space() {
471 assert_eq!(parse_citation_key("key rest"), Some(3));
472 }
473
474 #[test]
476 fn test_parse_bare_citation_simple() {
477 let result = try_parse_bare_citation("@doe99");
478 assert_eq!(result, Some((6, "doe99", false)));
479 }
480
481 #[test]
482 fn test_parse_bare_citation_with_suppress() {
483 let result = try_parse_bare_citation("-@smith04");
484 assert_eq!(result, Some((9, "smith04", true)));
485 }
486
487 #[test]
488 fn test_parse_bare_citation_with_trailing_text() {
489 let result = try_parse_bare_citation("@doe99 says");
490 assert_eq!(result, Some((6, "doe99", false)));
491 }
492
493 #[test]
494 fn test_parse_bare_citation_braced_key() {
495 let result = try_parse_bare_citation("@{https://example.com}");
496 assert_eq!(result, Some((22, "{https://example.com}", false)));
497 }
498
499 #[test]
500 fn test_parse_bare_citation_not_citation() {
501 assert_eq!(try_parse_bare_citation("not a citation"), None);
502 assert_eq!(try_parse_bare_citation("@"), None);
503 }
504
505 #[test]
507 fn test_parse_bracketed_citation_simple() {
508 let result = try_parse_bracketed_citation("[@doe99]");
509 assert_eq!(result, Some((8, "@doe99")));
510 }
511
512 #[test]
513 fn test_parse_bracketed_citation_multiple() {
514 let result = try_parse_bracketed_citation("[@doe99; @smith2000]");
515 assert_eq!(result, Some((20, "@doe99; @smith2000")));
516 }
517
518 #[test]
519 fn test_parse_bracketed_citation_with_prefix() {
520 let result = try_parse_bracketed_citation("[see @doe99]");
521 assert_eq!(result, Some((12, "see @doe99")));
522 }
523
524 #[test]
525 fn test_parse_bracketed_citation_with_locator() {
526 let result = try_parse_bracketed_citation("[@doe99, pp. 33-35]");
527 assert_eq!(result, Some((19, "@doe99, pp. 33-35")));
528 }
529
530 #[test]
531 fn test_parse_bracketed_citation_complex() {
532 let result = try_parse_bracketed_citation("[see @doe99, pp. 33-35 and *passim*]");
533 assert_eq!(result, Some((36, "see @doe99, pp. 33-35 and *passim*")));
534 }
535
536 #[test]
537 fn test_parse_bracketed_citation_with_suppress() {
538 let result = try_parse_bracketed_citation("[-@doe99]");
539 assert_eq!(result, Some((9, "-@doe99")));
540 }
541
542 #[test]
543 fn test_parse_bracketed_citation_not_citation() {
544 assert_eq!(try_parse_bracketed_citation("[text](url)"), None);
546 assert_eq!(try_parse_bracketed_citation("[just text]"), None);
547 }
548
549 #[test]
550 fn test_parse_bracketed_citation_nested_brackets() {
551 let result = try_parse_bracketed_citation("[see [nested] @doe99]");
552 assert_eq!(result, Some((21, "see [nested] @doe99")));
553 }
554
555 #[test]
556 fn test_parse_bracketed_citation_escaped_bracket() {
557 let result = try_parse_bracketed_citation(r"[@doe99 with \] escaped]");
558 assert_eq!(result, Some((24, r"@doe99 with \] escaped")));
559 }
560}